Exemplo n.º 1
0
def rule_with_variables_with_parameters(
    column_Age_domain,
    column_Date_domain,
    single_part_name_parameter_container,
    multi_part_name_parameter_container,
):
    variables_multi_part_name_parameter_node: ParameterNode = ParameterNode({
        "false_positive_threshold":
        1.0e-2,
    })
    root_variables_node: ParameterNode = ParameterNode({
        "variables":
        variables_multi_part_name_parameter_node,  # $variables.false_positive_threshold
    })
    rule: Rule = Rule(
        name="rule_with_variables_with_parameters",
        domain_builder=None,
        parameter_builders=None,
        expectation_configuration_builders=None,
        variables=ParameterContainer(parameter_nodes={
            "variables": root_variables_node,
        }),
    )
    rule._parameters = {
        column_Age_domain.id: single_part_name_parameter_container,
        column_Date_domain.id: multi_part_name_parameter_container,
    }
    return rule
Exemplo n.º 2
0
def rule_without_variables_without_parameters():
    rule: Rule = Rule(
        name="rule_with_no_variables_no_parameters",
        domain_builder=None,
        parameter_builders=None,
        expectation_configuration_builders=None,
        variables=None,
    )
    return rule
Exemplo n.º 3
0
def rule_without_parameters(empty_data_context, ):
    skip_if_python_below_minimum_version()

    rule: Rule = Rule(
        name="rule_with_no_variables_no_parameters",
        domain_builder=ColumnDomainBuilder(data_context=empty_data_context),
        expectation_configuration_builders=[
            DefaultExpectationConfigurationBuilder(
                expectation_type="expect_my_validation")
        ],
    )
    return rule
Exemplo n.º 4
0
def rule_without_variables(
    empty_data_context,
    column_Age_domain,
    column_Date_domain,
    variables_multi_part_name_parameter_container,
    single_part_name_parameter_container,
    multi_part_name_parameter_container,
):
    rule: Rule = Rule(
        name="rule_without_variables",
        variables=None,
        domain_builder=ColumnDomainBuilder(data_context=empty_data_context),
        expectation_configuration_builders=[
            DefaultExpectationConfigurationBuilder(
                expectation_type="expect_my_validation",
                column=f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
            ),
        ],
    )
    return rule
Exemplo n.º 5
0
def rule_with_parameters(
    empty_data_context,
    column_Age_domain,
    column_Date_domain,
    variables_multi_part_name_parameter_container,
    single_part_name_parameter_container,
    multi_part_name_parameter_container,
):
    skip_if_python_below_minimum_version()

    rule: Rule = Rule(
        name="rule_with_parameters",
        domain_builder=ColumnDomainBuilder(data_context=empty_data_context),
        expectation_configuration_builders=[
            DefaultExpectationConfigurationBuilder(
                expectation_type="expect_my_validation")
        ],
    )
    rule._parameters = {
        column_Age_domain.id: single_part_name_parameter_container,
        column_Date_domain.id: multi_part_name_parameter_container,
    }
    return rule
    def _build_table_rule() -> Rule:
        """
        This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for table "Domain" type.
        """
        # Step-1: Instantiate "TableDomainBuilder" object.

        table_domain_builder: TableDomainBuilder = TableDomainBuilder(
            data_context=None, )

        # Step-2: Declare "ParameterBuilder" for every metric of interest.

        table_row_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder(
            json_serialize=True)

        # Step-3: Declare "ParameterBuilder" for every "validation" need in "ExpectationConfigurationBuilder" objects.

        table_row_count_range_parameter_builder_for_validations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.build_numeric_metric_range_multi_batch_parameter_builder(
            metric_name="table.row_count",
            metric_value_kwargs=None,
            json_serialize=True,
        )

        validation_parameter_builder_configs: Optional[
            List[ParameterBuilderConfig]]

        # Step-4: Pass "validation" "ParameterBuilderConfig" objects to every "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type").

        validation_parameter_builder_configs = [
            ParameterBuilderConfig(
                **table_row_count_range_parameter_builder_for_validations.
                to_json_dict(), ),
        ]
        expect_table_row_count_to_be_between_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder(
            expectation_type="expect_table_row_count_to_be_between",
            validation_parameter_builder_configs=
            validation_parameter_builder_configs,
            min_value=
            f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]",
            max_value=
            f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]",
            meta={
                "profiler_details":
                f"{table_row_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
            },
        )

        # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components.

        variables: dict = {
            "false_positive_rate": 0.05,
            "quantile_statistic_interpolation_method": "auto",
            "estimator": "bootstrap",
            "n_resamples": 9999,
            "random_seed": None,
            "include_estimator_samples_histogram_in_details": False,
            "truncate_values": {
                "lower_bound": 0,
                "upper_bound": None,
            },
            "round_decimals": 0,
        }
        parameter_builders: List[ParameterBuilder] = [
            table_row_count_metric_multi_batch_parameter_builder_for_metrics,
        ]
        expectation_configuration_builders: List[
            ExpectationConfigurationBuilder] = [
                expect_table_row_count_to_be_between_expectation_configuration_builder,
            ]
        rule: Rule = Rule(
            name="table_rule",
            variables=variables,
            domain_builder=table_domain_builder,
            parameter_builders=parameter_builders,
            expectation_configuration_builders=
            expectation_configuration_builders,
        )

        return rule
    def _build_categorical_columns_rule() -> Rule:
        """
        This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for categorical columns.
        """
        # Step-1: Instantiate "CategoricalColumnDomainBuilder" for selecting columns containing "FEW" discrete values.

        categorical_column_type_domain_builder: CategoricalColumnDomainBuilder = (
            CategoricalColumnDomainBuilder(
                include_column_names=None,
                exclude_column_names=None,
                include_column_name_suffixes=None,
                exclude_column_name_suffixes=None,
                semantic_type_filter_module_name=None,
                semantic_type_filter_class_name=None,
                include_semantic_types=None,
                exclude_semantic_types=None,
                allowed_semantic_types_passthrough=None,
                cardinality_limit_mode=CardinalityLimitMode.REL_100,
                max_unique_values=None,
                max_proportion_unique=None,
                data_context=None,
            ))

        # Step-2: Declare "ParameterBuilder" for every metric of interest.

        column_distinct_values_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_distinct_values_count_metric_multi_batch_parameter_builder(
            json_serialize=True)

        # Step-3: Declare "ParameterBuilder" for every "validation" need in "ExpectationConfigurationBuilder" objects.

        column_distinct_values_count_range_parameter_builder_for_validations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.build_numeric_metric_range_multi_batch_parameter_builder(
            metric_name="column.distinct_values.count",
            metric_value_kwargs=None,
            json_serialize=True,
        )

        validation_parameter_builder_configs: Optional[
            List[ParameterBuilderConfig]]

        # Step-4: Pass "validation" "ParameterBuilderConfig" objects to every "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type").

        validation_parameter_builder_configs = [
            ParameterBuilderConfig(
                **
                column_distinct_values_count_range_parameter_builder_for_validations
                .to_json_dict(), ),
        ]
        expect_column_unique_value_count_to_be_between_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder(
            expectation_type="expect_column_unique_value_count_to_be_between",
            validation_parameter_builder_configs=
            validation_parameter_builder_configs,
            column=
            f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
            min_value=
            f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[0]",
            max_value=
            f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY}[1]",
            strict_min=f"{VARIABLES_KEY}strict_min",
            strict_max=f"{VARIABLES_KEY}strict_max",
            meta={
                "profiler_details":
                f"{column_distinct_values_count_range_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
            },
        )

        # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components.

        variables: dict = {
            "mostly": 1.0,
            "strict_min": False,
            "strict_max": False,
            "false_positive_rate": 0.05,
            "quantile_statistic_interpolation_method": "auto",
            "estimator": "bootstrap",
            "n_resamples": 9999,
            "random_seed": None,
            "include_estimator_samples_histogram_in_details": False,
            "truncate_values": {
                "lower_bound": 0.0,
                "upper_bound": None,
            },
            "round_decimals": 1,
        }
        parameter_builders: List[ParameterBuilder] = [
            column_distinct_values_count_metric_multi_batch_parameter_builder_for_metrics,
        ]
        expectation_configuration_builders: List[
            ExpectationConfigurationBuilder] = [
                expect_column_unique_value_count_to_be_between_expectation_configuration_builder,
            ]
        rule: Rule = Rule(
            name="categorical_columns_rule",
            variables=variables,
            domain_builder=categorical_column_type_domain_builder,
            parameter_builders=parameter_builders,
            expectation_configuration_builders=
            expectation_configuration_builders,
        )

        return rule
Exemplo n.º 8
0
def build_map_metric_rule(
    rule_name: str,
    expectation_type: str,
    map_metric_name: str,
    include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
    exclude_column_names: Optional[Union[str, Optional[List[str]]]] = None,
    include_column_name_suffixes: Optional[Union[str, Iterable,
                                                 List[str]]] = None,
    exclude_column_name_suffixes: Optional[Union[str, Iterable,
                                                 List[str]]] = None,
    semantic_type_filter_module_name: Optional[str] = None,
    semantic_type_filter_class_name: Optional[str] = None,
    include_semantic_types: Optional[Union[
        str, SemanticDomainTypes, List[Union[str,
                                             SemanticDomainTypes]]]] = None,
    exclude_semantic_types: Optional[Union[
        str, SemanticDomainTypes, List[Union[str,
                                             SemanticDomainTypes]]]] = None,
    max_unexpected_values: Union[str, int] = 0,
    max_unexpected_ratio: Optional[Union[str, float]] = None,
    min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1,
) -> Rule:
    """
    This method builds "Rule" object focused on emitting "ExpectationConfiguration" objects for any "map" style metric.
    """

    # Step-1: Instantiate "MapMetricColumnDomainBuilder" for specified "map_metric_name" (subject to directives).

    map_metric_column_domain_builder: MapMetricColumnDomainBuilder = (
        MapMetricColumnDomainBuilder(
            map_metric_name=map_metric_name,
            include_column_names=include_column_names,
            exclude_column_names=exclude_column_names,
            include_column_name_suffixes=include_column_name_suffixes,
            exclude_column_name_suffixes=exclude_column_name_suffixes,
            semantic_type_filter_module_name=semantic_type_filter_module_name,
            semantic_type_filter_class_name=semantic_type_filter_class_name,
            include_semantic_types=include_semantic_types,
            exclude_semantic_types=exclude_semantic_types,
            max_unexpected_values=max_unexpected_values,
            max_unexpected_ratio=max_unexpected_ratio,
            min_max_unexpected_values_proportion=
            min_max_unexpected_values_proportion,
            data_context=None,
        ))

    # Step-2: Declare "ParameterBuilder" for every metric of interest.

    column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_unique_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)
    column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)
    column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_null_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=True)

    # Step-3: Set up "MeanUnexpectedMapMetricMultiBatchParameterBuilder" to compute "condition" for emitting "ExpectationConfiguration" (based on "Domain" data).

    total_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_table_row_count_metric_multi_batch_parameter_builder(
        json_serialize=False)
    column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations: ParameterBuilder = DataAssistant.commonly_used_parameter_builders.get_column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder(
        json_serialize=False)
    evaluation_parameter_builder_configs: Optional[
        List[ParameterBuilderConfig]] = [
            ParameterBuilderConfig(
                **
                total_count_metric_multi_batch_parameter_builder_for_evaluations
                .to_json_dict()),
            ParameterBuilderConfig(
                **
                column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations
                .to_json_dict()),
        ]
    column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations: MeanUnexpectedMapMetricMultiBatchParameterBuilder = MeanUnexpectedMapMetricMultiBatchParameterBuilder(
        name=f"{map_metric_name}.unexpected_value",
        map_metric_name=map_metric_name,
        total_count_parameter_builder_name=
        total_count_metric_multi_batch_parameter_builder_for_evaluations.name,
        null_count_parameter_builder_name=
        column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_evaluations
        .name,
        metric_domain_kwargs=DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME,
        metric_value_kwargs=None,
        evaluation_parameter_builder_configs=
        evaluation_parameter_builder_configs,
        json_serialize=True,
        data_context=None,
    )

    # Step-4: Pass "MeanUnexpectedMapMetricMultiBatchParameterBuilder" as "validation" "ParameterBuilder" for "DefaultExpectationConfigurationBuilder", responsible for emitting "ExpectationConfiguration" (with specified "expectation_type").

    validation_parameter_builder_configs: Optional[
        List[ParameterBuilderConfig]] = [
            ParameterBuilderConfig(
                **
                column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations
                .to_json_dict()),
        ]
    expect_column_values_to_be_attribute_expectation_configuration_builder: DefaultExpectationConfigurationBuilder = DefaultExpectationConfigurationBuilder(
        expectation_type=expectation_type,
        validation_parameter_builder_configs=
        validation_parameter_builder_configs,
        column=
        f"{DOMAIN_KWARGS_PARAMETER_FULLY_QUALIFIED_NAME}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}column",
        condition=
        f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}{FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER}{FULLY_QUALIFIED_PARAMETER_NAME_VALUE_KEY} <= 1.0 - {VARIABLES_KEY}success_ratio",
        meta={
            "profiler_details":
            f"{column_values_attribute_mean_unexpected_value_multi_batch_parameter_builder_for_validations.fully_qualified_parameter_name}.{FULLY_QUALIFIED_PARAMETER_NAME_METADATA_KEY}",
        },
    )

    # Step-5: Instantiate and return "Rule" object, comprised of "variables", "domain_builder", "parameter_builders", and "expectation_configuration_builders" components.

    variables: dict = {
        "success_ratio": 7.5e-1,
    }

    parameter_builders: List[ParameterBuilder] = [
        column_values_unique_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
        column_values_nonnull_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
        column_values_null_unexpected_count_metric_multi_batch_parameter_builder_for_metrics,
    ]
    expectation_configuration_builders: List[ExpectationConfigurationBuilder] = [
        expect_column_values_to_be_attribute_expectation_configuration_builder,
    ]
    rule: Rule = Rule(
        name=rule_name,
        variables=variables,
        domain_builder=map_metric_column_domain_builder,
        parameter_builders=parameter_builders,
        expectation_configuration_builders=expectation_configuration_builders,
    )

    return rule
Exemplo n.º 9
0
    def _build_profiler(self) -> None:
        """
        Builds "RuleBasedProfiler", corresponding to present DataAssistant use case.

        Starts with empty "RuleBasedProfiler" (initialized in constructor) and adds Rule objects.

        Subclasses can add custom "Rule" objects as appropriate for their respective particular DataAssistant use cases.
        """
        variables: dict = {}

        profiler: Optional[BaseRuleBasedProfiler]
        rules: List[Rule]
        rule: Rule
        domain_builder: DomainBuilder
        parameter_builders: List[ParameterBuilder]
        expectation_configuration_builders: List[ExpectationConfigurationBuilder]

        """
        For each Self-Initializing "Expectation" as specified by "DataAssistant.expectation_kwargs_by_expectation_type"
        interface property, retrieve its "RuleBasedProfiler" configuration, construct "Rule" object based on it, while
        incorporating metrics "ParameterBuilder" objects for "MetricDomainTypes", emitted by "DomainBuilder"
        of comprised "Rule", specified by "DataAssistant.metrics_parameter_builders_by_domain" interface property.
        Append this "Rule" object to overall DataAssistant "RuleBasedProfiler" object; incorporate "variables" as well.
        """
        expectation_type: str
        expectation_kwargs: Dict[str, Any]
        for (
            expectation_type,
            expectation_kwargs,
        ) in self.expectation_kwargs_by_expectation_type.items():
            profiler = self._validator.build_rule_based_profiler_for_expectation(
                expectation_type=expectation_type
            )(**expectation_kwargs)
            variables.update(convert_variables_to_dict(variables=profiler.variables))
            rules = profiler.rules
            for rule in rules:
                domain_builder = rule.domain_builder
                parameter_builders = rule.parameter_builders or []
                parameter_builders.extend(
                    self.metrics_parameter_builders_by_domain[
                        Domain(
                            domain_builder.domain_type,
                        )
                    ]
                )
                expectation_configuration_builders = (
                    rule.expectation_configuration_builders or []
                )
                self.profiler.add_rule(
                    rule=Rule(
                        name=rule.name,
                        variables=rule.variables,
                        domain_builder=domain_builder,
                        parameter_builders=parameter_builders,
                        expectation_configuration_builders=expectation_configuration_builders,
                    )
                )

        self.profiler.variables = self.profiler.reconcile_profiler_variables(
            variables=variables,
            reconciliation_strategy=DEFAULT_RECONCILATION_DIRECTIVES.variables,
        )