示例#1
0
 def process_batch_parameters(self,
                              query_parameters=None,
                              limit=None,
                              dataset_options=None):
     batch_kwargs = super().process_batch_parameters(
         limit=limit,
         dataset_options=dataset_options,
     )
     nested_update(batch_kwargs, {"query_parameters": query_parameters})
     return batch_kwargs
示例#2
0
    def get_evaluation_parameter_dependencies(self):
        dependencies = {}
        for key, value in self.kwargs.items():
            if isinstance(value, dict) and "$PARAMETER" in value:
                if value["$PARAMETER"].startswith(
                    "urn:great_expectations:validations:"
                ):
                    try:
                        evaluation_parameter_id = parse_evaluation_parameter_urn(
                            value["$PARAMETER"]
                        )
                    except ParserError:
                        logger.warning(
                            "Unable to parse great_expectations urn {}".format(
                                value["$PARAMETER"]
                            )
                        )
                        continue

                    if evaluation_parameter_id.metric_kwargs_id is None:
                        nested_update(
                            dependencies,
                            {
                                evaluation_parameter_id.expectation_suite_name: [
                                    evaluation_parameter_id.metric_name
                                ]
                            },
                        )
                    else:
                        nested_update(
                            dependencies,
                            {
                                evaluation_parameter_id.expectation_suite_name: [
                                    {
                                        "metric_kwargs_id": {
                                            evaluation_parameter_id.metric_kwargs_id: [
                                                evaluation_parameter_id.metric_name
                                            ]
                                        }
                                    }
                                ]
                            },
                        )
                    # if evaluation_parameter_id.expectation_suite_name not in dependencies:
                    #     dependencies[evaluation_parameter_id.expectation_suite_name] = {"metric_kwargs_id": {}}
                    #
                    # if evaluation_parameter_id.metric_kwargs_id not in dependencies[evaluation_parameter_id.expectation_suite_name]["metric_kwargs_id"]:
                    #     dependencies[evaluation_parameter_id.expectation_suite_name]["metric_kwargs_id"][evaluation_parameter_id.metric_kwargs_id] = []
                    # dependencies[evaluation_parameter_id.expectation_suite_name]["metric_kwargs_id"][
                    #     evaluation_parameter_id.metric_kwargs_id].append(evaluation_parameter_id.metric_name)

        return dependencies
def test_nested_update():
    # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary)
    batch_kwargs = {
        "path": "/a/path",
        "reader_method": "read_csv",
        "reader_options": {"header": 0},
    }

    nested_update(batch_kwargs, {"reader_options": {"nrows": 1}})

    assert batch_kwargs == {
        "path": "/a/path",
        "reader_method": "read_csv",
        "reader_options": {"header": 0, "nrows": 1},
    }
示例#4
0
def reconcile_rule_variables(
    variables: ParameterContainer,
    variables_config: dict,
    reconciliation_strategy:
    ReconciliationStrategy = DEFAULT_RECONCILATION_DIRECTIVES.variables,
) -> dict:
    """
    Rule "variables" reconciliation involves combining the variables, instantiated from Rule configuration
    (e.g., stored in a YAML file managed by the Profiler store), with the variables override, possibly supplied
    as part of the candiate override rule configuration.

    The reconciliation logic for "variables" is of the "replace" nature: An override value complements the
    original on key "miss", and replaces the original on key "hit" (or "collision"), because "variables" is a
    unique member for a Rule.

    :param variables: existing variables of a Rule
    :param variables_config: variables configuration override, supplied in dictionary (configuration) form
    :param reconciliation_strategy: one of update, nested_update, or overwrite ways of reconciling overwrites
    :return: reconciled variables configuration, returned in dictionary (configuration) form
    """
    effective_variables_config: dict = convert_variables_to_dict(
        variables=variables)
    if variables_config:
        if reconciliation_strategy == ReconciliationStrategy.NESTED_UPDATE:
            effective_variables_config = nested_update(
                effective_variables_config,
                variables_config,
            )
        elif reconciliation_strategy == ReconciliationStrategy.REPLACE:
            effective_variables_config = variables_config
        elif reconciliation_strategy == ReconciliationStrategy.UPDATE:
            effective_variables_config.update(variables_config)

    return effective_variables_config
def test_nested_update_lists():
    # nested_update is useful for update nested dictionaries (such as batch_kwargs with reader_options as a dictionary)
    dependencies = {
        "suite.warning": {"metric.name": ["column=foo"]},
        "suite.failure": {"metric.blarg": [""]},
    }

    new_dependencies = {
        "suite.warning": {
            "metric.other_name": ["column=foo"],
            "metric.name": ["column=bar"],
        }
    }

    nested_update(dependencies, new_dependencies)

    assert dependencies == {
        "suite.warning": {
            "metric.name": ["column=foo", "column=bar"],
            "metric.other_name": ["column=foo"],
        },
        "suite.failure": {"metric.blarg": [""]},
    }
示例#6
0
def get_substituted_validation_dict(
    substituted_runtime_config: dict, validation_dict: dict
) -> dict:
    substituted_validation_dict = {
        "batch_request": get_substituted_batch_request(
            substituted_runtime_config=substituted_runtime_config,
            validation_batch_request=validation_dict.get("batch_request"),
        ),
        "expectation_suite_name": validation_dict.get("expectation_suite_name")
        or substituted_runtime_config.get("expectation_suite_name"),
        "expectation_suite_ge_cloud_id": validation_dict.get(
            "expectation_suite_ge_cloud_id"
        )
        or substituted_runtime_config.get("expectation_suite_ge_cloud_id"),
        "action_list": get_updated_action_list(
            base_action_list=substituted_runtime_config.get("action_list"),
            other_action_list=validation_dict.get("action_list", {}),
        ),
        "evaluation_parameters": nested_update(
            substituted_runtime_config.get("evaluation_parameters") or {},
            validation_dict.get("evaluation_parameters", {}),
            dedup=True,
        ),
        "runtime_configuration": nested_update(
            substituted_runtime_config.get("runtime_configuration") or {},
            validation_dict.get("runtime_configuration", {}),
            dedup=True,
        ),
        "include_rendered_content": validation_dict.get("include_rendered_content")
        or substituted_runtime_config.get("include_rendered_content")
        or False,
    }
    if validation_dict.get("name") is not None:
        substituted_validation_dict["name"] = validation_dict["name"]
    validate_validation_dict(substituted_validation_dict)
    return substituted_validation_dict
        def usage_statistics_wrapped_method(*args, **kwargs):
            # if a function like `build_data_docs()` is being called as a `dry_run`
            # then we dont want to emit usage_statistics. We just return the function without sending a usage_stats message
            if "dry_run" in kwargs and kwargs["dry_run"]:
                return func(*args, **kwargs)
            # Set event_payload now so it can be updated below
            event_payload = {}
            message = {"event_payload": event_payload, "event": event_name}
            result = None
            time_begin: int = int(round(time.time() * 1000))
            try:
                if args_payload_fn is not None:
                    nested_update(event_payload,
                                  args_payload_fn(*args, **kwargs))

                result = func(*args, **kwargs)
                message["success"] = True
            except Exception:
                message["success"] = False
                raise
            finally:
                if not ((result is None) or (result_payload_fn is None)):
                    nested_update(event_payload, result_payload_fn(result))

                time_end: int = int(round(time.time() * 1000))
                delta_t: int = time_end - time_begin

                handler = get_usage_statistics_handler(list(args))
                if handler:
                    event_duration_property_name: str = (
                        f"{event_name}.duration".replace(".", "_"))
                    setattr(handler, event_duration_property_name, delta_t)
                    handler.emit(message)
                    delattr(handler, event_duration_property_name)

            return result
示例#8
0
    def get_evaluation_parameter_dependencies(self):
        parsed_dependencies = dict()
        for key, value in self.kwargs.items():
            if isinstance(value, dict) and "$PARAMETER" in value:
                param_string_dependencies = find_evaluation_parameter_dependencies(
                    value["$PARAMETER"]
                )
                nested_update(parsed_dependencies, param_string_dependencies)

        dependencies = dict()
        urns = parsed_dependencies.get("urns", [])
        for string_urn in urns:
            try:
                urn = ge_urn.parseString(string_urn)
            except ParserError:
                logger.warning(
                    "Unable to parse great_expectations urn {}".format(
                        value["$PARAMETER"]
                    )
                )
                continue

            if not urn.get("metric_kwargs"):
                nested_update(
                    dependencies,
                    {urn["expectation_suite_name"]: [urn["metric_name"]]},
                )
            else:
                nested_update(
                    dependencies,
                    {
                        urn["expectation_suite_name"]: [
                            {
                                "metric_kwargs_id": {
                                    urn["metric_kwargs"]: [urn["metric_name"]]
                                }
                            }
                        ]
                    },
                )

        dependencies = _deduplicate_evaluation_parameter_dependencies(dependencies)
        return dependencies
示例#9
0
    def __init__(
        self,
        data_context,
        store_backend,
        site_name=None,
        site_index_builder=None,
        show_how_to_buttons=True,
        site_section_builders=None,
        runtime_environment=None,
        ge_cloud_mode=False,
        **kwargs,
    ) -> None:
        self.site_name = site_name
        self.data_context = data_context
        self.store_backend = store_backend
        self.show_how_to_buttons = show_how_to_buttons
        self.ge_cloud_mode = ge_cloud_mode

        usage_statistics_config = data_context.anonymous_usage_statistics
        data_context_id = None
        if (usage_statistics_config and usage_statistics_config.enabled
                and usage_statistics_config.data_context_id):
            data_context_id = usage_statistics_config.data_context_id

        self.data_context_id = data_context_id

        # set custom_styles_directory if present
        custom_styles_directory = None
        plugins_directory = data_context.plugins_directory
        if plugins_directory and os.path.isdir(
                os.path.join(plugins_directory, "custom_data_docs", "styles")):
            custom_styles_directory = os.path.join(plugins_directory,
                                                   "custom_data_docs",
                                                   "styles")

        # set custom_views_directory if present
        custom_views_directory = None
        if plugins_directory and os.path.isdir(
                os.path.join(plugins_directory, "custom_data_docs", "views")):
            custom_views_directory = os.path.join(plugins_directory,
                                                  "custom_data_docs", "views")

        if site_index_builder is None:
            site_index_builder = {"class_name": "DefaultSiteIndexBuilder"}

        # The site builder is essentially a frontend store. We'll open up
        # three types of backends using the base
        # type of the configuration defined in the store_backend section

        if ge_cloud_mode:
            self.target_store = JsonSiteStore(
                store_backend=store_backend,
                runtime_environment=runtime_environment)
        else:
            self.target_store = HtmlSiteStore(
                store_backend=store_backend,
                runtime_environment=runtime_environment)

        default_site_section_builders_config = {
            "expectations": {
                "class_name": "DefaultSiteSectionBuilder",
                "source_store_name": data_context.expectations_store_name,
                "renderer": {
                    "class_name": "ExpectationSuitePageRenderer"
                },
            },
            "validations": {
                "class_name":
                "DefaultSiteSectionBuilder",
                "source_store_name":
                data_context.validations_store_name,
                "renderer": {
                    "class_name": "ValidationResultsPageRenderer"
                },
                "validation_results_limit":
                site_index_builder.get("validation_results_limit"),
            },
            "profiling": {
                "class_name": "DefaultSiteSectionBuilder",
                "source_store_name": data_context.validations_store_name,
                "renderer": {
                    "class_name": "ProfilingResultsPageRenderer"
                },
            },
        }

        if site_section_builders is None:
            site_section_builders = default_site_section_builders_config
        else:
            site_section_builders = nested_update(
                default_site_section_builders_config, site_section_builders)

        # set default run_name_filter
        if site_section_builders.get("validations",
                                     "None") not in FALSEY_YAML_STRINGS:
            if site_section_builders["validations"].get(
                    "run_name_filter") is None:
                site_section_builders["validations"]["run_name_filter"] = {
                    "not_includes": "profiling"
                }
        if site_section_builders.get("profiling",
                                     "None") not in FALSEY_YAML_STRINGS:
            if site_section_builders["profiling"].get(
                    "run_name_filter") is None:
                site_section_builders["profiling"]["run_name_filter"] = {
                    "includes": "profiling"
                }

        self.site_section_builders = {}
        for site_section_name, site_section_config in site_section_builders.items(
        ):
            if not site_section_config or site_section_config in FALSEY_YAML_STRINGS:
                continue
            module_name = (site_section_config.get("module_name") or
                           "great_expectations.render.renderer.site_builder")
            self.site_section_builders[
                site_section_name] = instantiate_class_from_config(
                    config=site_section_config,
                    runtime_environment={
                        "data_context": data_context,
                        "target_store": self.target_store,
                        "custom_styles_directory": custom_styles_directory,
                        "custom_views_directory": custom_views_directory,
                        "data_context_id": self.data_context_id,
                        "show_how_to_buttons": self.show_how_to_buttons,
                        "ge_cloud_mode": self.ge_cloud_mode,
                    },
                    config_defaults={
                        "name": site_section_name,
                        "module_name": module_name
                    },
                )
            if not self.site_section_builders[site_section_name]:
                raise exceptions.ClassInstantiationError(
                    module_name=module_name,
                    package_name=None,
                    class_name=site_section_config["class_name"],
                )

        module_name = (site_index_builder.get("module_name")
                       or "great_expectations.render.renderer.site_builder")
        class_name = site_index_builder.get(
            "class_name") or "DefaultSiteIndexBuilder"
        self.site_index_builder = instantiate_class_from_config(
            config=site_index_builder,
            runtime_environment={
                "data_context": data_context,
                "custom_styles_directory": custom_styles_directory,
                "custom_views_directory": custom_views_directory,
                "show_how_to_buttons": self.show_how_to_buttons,
                "target_store": self.target_store,
                "site_name": self.site_name,
                "data_context_id": self.data_context_id,
                "source_stores": {
                    section_name: section_config.get("source_store_name")
                    for (section_name,
                         section_config) in site_section_builders.items()
                    if section_config not in FALSEY_YAML_STRINGS
                },
                "site_section_builders_config": site_section_builders,
                "ge_cloud_mode": self.ge_cloud_mode,
            },
            config_defaults={
                "name": "site_index_builder",
                "module_name": module_name,
                "class_name": class_name,
            },
        )
        if not self.site_index_builder:
            raise exceptions.ClassInstantiationError(
                module_name=module_name,
                package_name=None,
                class_name=site_index_builder["class_name"],
            )
示例#10
0
def substitute_runtime_config(source_config: dict, runtime_kwargs: dict) -> dict:
    if not (runtime_kwargs and any(runtime_kwargs.values())):
        return source_config

    dest_config: dict = copy.deepcopy(source_config)

    # replace
    if runtime_kwargs.get("template_name") is not None:
        dest_config["template_name"] = runtime_kwargs["template_name"]
    if runtime_kwargs.get("run_name_template") is not None:
        dest_config["run_name_template"] = runtime_kwargs["run_name_template"]
    if runtime_kwargs.get("expectation_suite_name") is not None:
        dest_config["expectation_suite_name"] = runtime_kwargs["expectation_suite_name"]
    if runtime_kwargs.get("expectation_suite_ge_cloud_id") is not None:
        dest_config["expectation_suite_ge_cloud_id"] = runtime_kwargs[
            "expectation_suite_ge_cloud_id"
        ]
    # update
    if runtime_kwargs.get("batch_request") is not None:
        batch_request = dest_config.get("batch_request") or {}
        batch_request_from_runtime_kwargs = runtime_kwargs["batch_request"]
        batch_request_from_runtime_kwargs = get_batch_request_as_dict(
            batch_request=batch_request_from_runtime_kwargs
        )
        updated_batch_request = nested_update(
            batch_request,
            batch_request_from_runtime_kwargs,
            dedup=True,
        )
        dest_config["batch_request"] = updated_batch_request
    if runtime_kwargs.get("action_list") is not None:
        action_list = dest_config.get("action_list") or []
        dest_config["action_list"] = get_updated_action_list(
            base_action_list=action_list,
            other_action_list=runtime_kwargs["action_list"],
        )
    if runtime_kwargs.get("evaluation_parameters") is not None:
        evaluation_parameters = dest_config.get("evaluation_parameters") or {}
        updated_evaluation_parameters = nested_update(
            evaluation_parameters,
            runtime_kwargs["evaluation_parameters"],
            dedup=True,
        )
        dest_config["evaluation_parameters"] = updated_evaluation_parameters
    if runtime_kwargs.get("runtime_configuration") is not None:
        runtime_configuration = dest_config.get("runtime_configuration") or {}
        updated_runtime_configuration = nested_update(
            runtime_configuration,
            runtime_kwargs["runtime_configuration"],
            dedup=True,
        )
        dest_config["runtime_configuration"] = updated_runtime_configuration
    if runtime_kwargs.get("validations") is not None:
        validations = dest_config.get("validations") or []
        existing_validations = source_config.get("validations") or []
        validations.extend(
            filter(
                lambda v: v not in existing_validations,
                runtime_kwargs["validations"],
            )
        )
        dest_config["validations"] = validations
    if runtime_kwargs.get("profilers") is not None:
        profilers = dest_config.get("profilers") or []
        existing_profilers = source_config.get("profilers") or []
        profilers.extend(
            filter(lambda v: v not in existing_profilers, runtime_kwargs["profilers"])
        )
        dest_config["profilers"] = profilers

    return dest_config
 def _get_theme(theme: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     default_theme: Dict[str, Any] = copy.deepcopy(AltairThemes.DEFAULT_THEME.value)
     if theme:
         return nested_update(default_theme, theme)
     else:
         return default_theme
示例#12
0
 def process_batch_parameters(self, query_parameters=None, limit=None):
     batch_kwargs = super(SqlAlchemyDatasource, self).process_batch_parameters(limit=limit)
     nested_update(batch_kwargs, {"query_parameters": query_parameters})
     return batch_kwargs
    def _reconcile_rule_expectation_configuration_builder_configs(
            rule: Rule, expectation_configuration_builder_configs: List[dict]
    ) -> List[dict]:
        """
        Rule "expectation configuration builders" reconciliation involves combining the expectation configuration builders, instantiated from Rule
        configuration (e.g., stored in a YAML file managed by the Profiler store), with the expectation configuration builders
        overrides, possibly supplied as part of the candiate override rule configuration.

        The reconciliation logic for "expectation configuration builders" is of the "upsert" nature: A candidate override expectation configuration
        builder configuration contributes to the expectation configuration builders list of the rule if the corresponding expectation configuration
        builder name does not exist in the list of instantiated expectation configuration builders of the rule; otherwise, once
        instnatiated, it replaces the configuration associated with the original expectation configuration builder having the same name.

        :param rule: Profiler "rule", subject to expectations configuration builder overrides
        :param expectation_configuration_builder_configs: expectation configuration builder configuration overrides, supplied in dictionary (configuration) form
        :return: reconciled expectation configuration builder configuration, returned in dictionary (configuration) form
        """
        expectation_configuration_builder_config: dict
        for (expectation_configuration_builder_config
             ) in expectation_configuration_builder_configs:
            _validate_builder_override_config(
                builder_config=expectation_configuration_builder_config)

        effective_expectation_configuration_builder_configs: Dict[str,
                                                                  dict] = {}

        current_expectation_configuration_builders: Dict[
            str,
            ExpectationConfigurationBuilder] = rule._get_expectation_configuration_builders_as_dict(
            )

        expectation_configuration_builder_name: str
        expectation_configuration_builder: ExpectationConfigurationBuilder
        expectation_configuration_builder_as_dict: dict
        for (
                expectation_configuration_builder_name,
                expectation_configuration_builder,
        ) in current_expectation_configuration_builders.items():
            expectation_configuration_builder_as_dict = (
                expectation_configuration_builder.to_dict())
            expectation_configuration_builder_as_dict[
                "class_name"] = expectation_configuration_builder.__class__.__name__
            expectation_configuration_builder_as_dict[
                "module_name"] = expectation_configuration_builder.__class__.__module__

            # Roundtrip through schema validation to add/or restore any missing fields.
            deserialized_config: ExpectationConfigurationBuilderConfig = (
                expectationConfigurationBuilderConfigSchema.load(
                    expectation_configuration_builder_as_dict))
            serialized_config: dict = deserialized_config.to_dict()

            effective_expectation_configuration_builder_configs[
                expectation_configuration_builder_name] = serialized_config

        effective_expectation_configuration_builder_configs = nested_update(
            effective_expectation_configuration_builder_configs,
            {
                expectation_configuration_builder_config["expectation_type"]:
                expectation_configuration_builder_config
                for expectation_configuration_builder_config in
                expectation_configuration_builder_configs
            },
            dedup=True,
        )

        if not effective_expectation_configuration_builder_configs:
            return []

        return list(
            effective_expectation_configuration_builder_configs.values())