Exemplo n.º 1
0
    def _list_data_assets_validated_by_batch_id(self) -> dict:
        if self._data_assets_validated_by_batch_id is None:
            assets_validated_by_batch_id = {}

            for validation_result in self.list_validation_results():
                batch_kwargs = validation_result.meta["batch_kwargs"]
                batch_id = BatchKwargs(batch_kwargs).to_id()
                expectation_suite_name = validation_result.meta[
                    "expectation_suite_name"
                ]
                if batch_id not in assets_validated_by_batch_id:
                    assets_validated_by_batch_id[batch_id] = {
                        "batch_kwargs": batch_kwargs,
                        "validation_results": [validation_result],
                        "expectation_suite_names": [expectation_suite_name],
                    }
                else:
                    assets_validated_by_batch_id[batch_id]["validation_results"].append(
                        validation_result
                    )
                    assets_validated_by_batch_id[batch_id][
                        "expectation_suite_names"
                    ].append(expectation_suite_name)
            self._data_assets_validated_by_batch_id = assets_validated_by_batch_id
        return self._data_assets_validated_by_batch_id
Exemplo n.º 2
0
    def render(
        self,
        validation_result=None,
        data_docs_pages=None,
        notify_with=None,
    ):

        summary_text = (
            "No validation occurred. Please ensure you passed a validation_result."
        )
        status = "Failed ❌"

        if validation_result:
            expectation_suite_name = validation_result.meta.get(
                "expectation_suite_name", "__no_expectation_suite_name__")

            if "batch_kwargs" in validation_result.meta:
                data_asset_name = validation_result.meta["batch_kwargs"].get(
                    "data_asset_name", "__no_data_asset_name__")
            elif "active_batch_definition" in validation_result.meta:
                data_asset_name = (
                    validation_result.meta["active_batch_definition"].
                    data_asset_name
                    if validation_result.meta["active_batch_definition"].
                    data_asset_name else "__no_data_asset_name__")
            else:
                data_asset_name = "__no_data_asset_name__"

            n_checks_succeeded = validation_result.statistics[
                "successful_expectations"]
            n_checks = validation_result.statistics["evaluated_expectations"]
            run_id = validation_result.meta.get("run_id", "__no_run_id__")
            batch_id = BatchKwargs(
                validation_result.meta.get("batch_kwargs", {})).to_id()
            check_details_text = "{} of {} expectations were met".format(
                n_checks_succeeded, n_checks)

            if validation_result.success:
                status = "Success 🎉"

            summary_text = """Batch Validation Status: {}
Expectation suite name: {}
Data asset name: {}
Run ID: {}
Batch ID: {}
Summary: {}""".format(
                status,
                expectation_suite_name,
                data_asset_name,
                run_id,
                batch_id,
                check_details_text,
            )

        return summary_text
Exemplo n.º 3
0
    def _run_suite(
        self,
        dataset_name: str,
        dataset_path: Optional[str],
        df: Any,
        target_expectation_suite_name: str,
        run_id: str,
    ):
        target_suite = self.expectation_context.get_expectation_suite(
            target_expectation_suite_name)
        batch_markers = BatchMarkers({
            "ge_load_time":
            datetime.datetime.now(
                datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
        })

        batch_kwargs = {"datasource": generate_datasource_name(dataset_name)}

        if dataset_path:
            dataasset_name, _ = os.path.splitext(
                os.path.basename(dataset_path))
            batch_kwargs["path"] = str(dataset_path)
            batch_kwargs["data_asset_name"] = dataasset_name

        batch = Batch(
            "kedro",
            batch_kwargs=BatchKwargs(batch_kwargs),
            data=df,
            batch_parameters=None,
            batch_markers=batch_markers,
            data_context=self.expectation_context,
        )

        try:
            v = Validator(
                batch=batch,
                expectation_suite=target_suite,
            )
        except ValueError:
            raise UnsupportedDataSet

        validator_dataset_batch = v.get_dataset()
        return self.expectation_context.run_validation_operator(
            "action_list_operator", [validator_dataset_batch], run_id=run_id)
Exemplo n.º 4
0
 def _run_suite(self, dataset, target_expectation_suite_name, run_id):
     class_name = self._get_ge_class_name(dataset)
     target_suite = self.expectation_context.get_expectation_suite(
         target_expectation_suite_name)
     df = dataset.load()
     batch = Batch(
         'kedro', BatchKwargs({
             'path': 'kedro',
             'datasource': 'kedro'
         }), df, None,
         BatchMarkers({
             "ge_load_time":
             datetime.datetime.now(
                 datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
         }), self.expectation_context)
     v = Validator(batch, target_suite, {
         'module_name': 'great_expectations.dataset',
         'class_name': class_name
     })
     vgdf = v.get_dataset()
     self.expectation_context.run_validation_operator(
         'action_list_operator', [vgdf], run_id=run_id)
Exemplo n.º 5
0
    def __init__(
        self,
        data,
        batch_request: BatchRequest = None,
        batch_definition: BatchDefinition = None,
        batch_spec: BatchSpec = None,
        batch_markers: BatchMarkers = None,
        # The remaining parameters are for backward compatibility.
        data_context=None,
        datasource_name=None,
        batch_parameters=None,
        batch_kwargs=None,
    ):
        self._data = data
        if batch_request is None:
            batch_request = dict()
        self._batch_request = batch_request
        if batch_definition is None:
            batch_definition = IDDict()
        self._batch_definition = batch_definition
        if batch_spec is None:
            batch_spec = BatchSpec()
        self._batch_spec = batch_spec

        if batch_markers is None:
            batch_markers = BatchMarkers(
                {
                    "ge_load_time": datetime.datetime.now(
                        datetime.timezone.utc
                    ).strftime("%Y%m%dT%H%M%S.%fZ")
                }
            )
        self._batch_markers = batch_markers

        # The remaining parameters are for backward compatibility.
        self._data_context = data_context
        self._datasource_name = datasource_name
        self._batch_parameters = batch_parameters
        self._batch_kwargs = batch_kwargs or BatchKwargs()
Exemplo n.º 6
0
    def render(self, validation_result=None, data_docs_pages=None, notify_with=None):
        default_text = (
            "No validation occurred. Please ensure you passed a validation_result."
        )
        status = "Failed ❌"

        title = default_text

        html = default_text

        if validation_result:
            expectation_suite_name = validation_result.meta.get(
                "expectation_suite_name", "__no_expectation_suite_name__"
            )

            if "batch_kwargs" in validation_result.meta:
                data_asset_name = validation_result.meta["batch_kwargs"].get(
                    "data_asset_name", "__no_data_asset_name__"
                )
            elif "active_batch_definition" in validation_result.meta:
                data_asset_name = (
                    validation_result.meta["active_batch_definition"].data_asset_name
                    if validation_result.meta["active_batch_definition"].data_asset_name
                    else "__no_data_asset_name__"
                )
            else:
                data_asset_name = "__no_data_asset_name__"

            n_checks_succeeded = validation_result.statistics["successful_expectations"]
            n_checks = validation_result.statistics["evaluated_expectations"]
            run_id = validation_result.meta.get("run_id", "__no_run_id__")
            batch_id = BatchKwargs(
                validation_result.meta.get("batch_kwargs", {})
            ).to_id()
            check_details_text = f"<strong>{n_checks_succeeded}</strong> of <strong>{n_checks}</strong> expectations were met"

            if validation_result.success:
                status = "Success 🎉"

            title = f"{expectation_suite_name}: {status}"

            html = textwrap.dedent(
                f"""\
                <p><strong>Batch Validation Status</strong>: {status}</p>
                <p><strong>Expectation suite name</strong>: {expectation_suite_name}</p>
                <p><strong>Data asset name</strong>: {data_asset_name}</p>
                <p><strong>Run ID</strong>: {run_id}</p>
                <p><strong>Batch ID</strong>: {batch_id}</p>
                <p><strong>Summary</strong>: {check_details_text}</p>"""
            )
            if data_docs_pages:
                if notify_with is not None:
                    for docs_link_key in notify_with:
                        if docs_link_key in data_docs_pages.keys():
                            docs_link = data_docs_pages[docs_link_key]
                            report_element = self._get_report_element(docs_link)
                        else:
                            report_element = str(
                                f"<strong>ERROR</strong>: The email is trying to provide a link to the following DataDocs: "
                                f"`{str(docs_link_key)}`, but it is not configured under data_docs_sites "
                                "in the great_expectations.yml</br>"
                            )
                            logger.critical(report_element)
                        if report_element:
                            print(report_element)
                            html += report_element
                else:
                    for docs_link_key in data_docs_pages.keys():
                        if docs_link_key == "class":
                            continue
                        docs_link = data_docs_pages[docs_link_key]
                        report_element = self._get_report_element(docs_link)
                        if report_element:
                            html += report_element

            if "result_reference" in validation_result.meta:
                result_reference = validation_result.meta["result_reference"]
                report_element = (
                    f"- <strong>Validation Report</strong>: {result_reference}</br>"
                )
                html += report_element

            if "dataset_reference" in validation_result.meta:
                dataset_reference = validation_result.meta["dataset_reference"]
                report_element = f"- <strong>Validation data asset</strong>: {dataset_reference}</br>"
                html += report_element

        documentation_url = "https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html"
        footer_section = f'<p>Learn <a href="{documentation_url}">here</a> how to review validation results in Data Docs</p>'
        html += footer_section
        return title, html
Exemplo n.º 7
0
    def render(
        self,
        validation_result=None,
        data_docs_pages=None,
        notify_with=None,
    ):
        default_text = (
            "No validation occurred. Please ensure you passed a validation_result."
        )
        status = "Failed :x:"

        title_block = {
            "type": "section",
            "text": {
                "type": "mrkdwn",
                "text": default_text,
            },
        }

        query = {
            "blocks": [title_block],
            # this abbreviated root level "text" will show up in the notification and not the message
            "text": default_text,
        }

        if validation_result:
            expectation_suite_name = validation_result.meta.get(
                "expectation_suite_name", "__no_expectation_suite_name__")

            if "batch_kwargs" in validation_result.meta:
                data_asset_name = validation_result.meta["batch_kwargs"].get(
                    "data_asset_name", "__no_data_asset_name__")
            elif "active_batch_definition" in validation_result.meta:
                data_asset_name = (
                    validation_result.meta["active_batch_definition"].
                    data_asset_name
                    if validation_result.meta["active_batch_definition"].
                    data_asset_name else "__no_data_asset_name__")
            else:
                data_asset_name = "__no_data_asset_name__"

            n_checks_succeeded = validation_result.statistics[
                "successful_expectations"]
            n_checks = validation_result.statistics["evaluated_expectations"]
            run_id = validation_result.meta.get("run_id", "__no_run_id__")
            batch_id = BatchKwargs(
                validation_result.meta.get("batch_kwargs", {})).to_id()
            check_details_text = (
                f"*{n_checks_succeeded}* of *{n_checks}* expectations were met"
            )

            if validation_result.success:
                status = "Success :tada:"

            summary_text = f"""*Batch Validation Status*: {status}
*Expectation suite name*: `{expectation_suite_name}`
*Data asset name*: `{data_asset_name}`
*Run ID*: `{run_id}`
*Batch ID*: `{batch_id}`
*Summary*: {check_details_text}"""
            query["blocks"][0]["text"]["text"] = summary_text
            # this abbreviated root level "text" will show up in the notification and not the message
            query["text"] = f"{expectation_suite_name}: {status}"

            if data_docs_pages:
                if notify_with is not None:
                    for docs_link_key in notify_with:
                        if docs_link_key in data_docs_pages.keys():
                            docs_link = data_docs_pages[docs_link_key]
                            report_element = self._get_report_element(
                                docs_link)
                        else:
                            logger.critical(
                                f"*ERROR*: Slack is trying to provide a link to the following DataDocs: `{str(docs_link_key)}`, but it is not configured under `data_docs_sites` in the `great_expectations.yml`\n"
                            )
                            report_element = {
                                "type": "section",
                                "text": {
                                    "type":
                                    "mrkdwn",
                                    "text":
                                    f"*ERROR*: Slack is trying to provide a link to the following DataDocs: `{str(docs_link_key)}`, but it is not configured under `data_docs_sites` in the `great_expectations.yml`\n",
                                },
                            }
                        if report_element:
                            query["blocks"].append(report_element)
                else:
                    for docs_link_key in data_docs_pages.keys():
                        if docs_link_key == "class":
                            continue
                        docs_link = data_docs_pages[docs_link_key]
                        report_element = self._get_report_element(docs_link)
                        if report_element:
                            query["blocks"].append(report_element)

            if "result_reference" in validation_result.meta:
                result_reference = validation_result.meta["result_reference"]
                report_element = {
                    "type": "section",
                    "text": {
                        "type": "mrkdwn",
                        "text": f"- *Validation Report*: {result_reference}",
                    },
                }
                query["blocks"].append(report_element)

            if "dataset_reference" in validation_result.meta:
                dataset_reference = validation_result.meta["dataset_reference"]
                dataset_element = {
                    "type": "section",
                    "text": {
                        "type": "mrkdwn",
                        "text":
                        f"- *Validation data asset*: {dataset_reference}",
                    },
                }
                query["blocks"].append(dataset_element)

        documentation_url = "https://docs.greatexpectations.io/en/latest/guides/tutorials/getting_started/set_up_data_docs.html"
        footer_section = {
            "type":
            "context",
            "elements": [{
                "type":
                "mrkdwn",
                "text":
                f"Learn how to review validation results in Data Docs: {documentation_url}",
            }],
        }

        divider_block = {"type": "divider"}
        query["blocks"].append(divider_block)
        query["blocks"].append(footer_section)
        return query