Exemplo n.º 1
0
def test_ExpectationsStore_with_DatabaseStoreBackend():
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs
    })

    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(ns_1,
                 ExpectationSuite(expectation_suite_name="a.b.c.warning"))
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning")

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))

    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_ExpectationsStore_with_DatabaseStoreBackend(sa):
    # Use sqlite so we don't require postgres for this test.
    connection_kwargs = {"drivername": "sqlite"}

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs,
    })
    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    # first suite to add to db
    default_suite = ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    # initial set and check if first suite exists
    my_store.set(ns_1, default_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    # update suite and check if new value exists
    updated_suite = ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
    my_store.set(ns_1, updated_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
Exemplo n.º 3
0
    def _add_expectations_to_index_links(self, index_links_dict: OrderedDict,
                                         skip_and_clean_missing: bool) -> None:
        expectations = self.site_section_builders_config.get(
            "expectations", "None")
        if expectations and expectations not in FALSEY_YAML_STRINGS:
            expectation_suite_source_keys = self.data_context.stores[
                self.site_section_builders_config["expectations"].get(
                    "source_store_name")].list_keys()
            expectation_suite_site_keys = [
                ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
                for expectation_suite_tuple in self.target_store.
                store_backends[ExpectationSuiteIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for expectation_suite_site_key in expectation_suite_site_keys:
                    if expectation_suite_site_key not in expectation_suite_source_keys:
                        self.target_store.store_backends[
                            ExpectationSuiteIdentifier].remove_key(
                                expectation_suite_site_key)
                    else:
                        cleaned_keys.append(expectation_suite_site_key)
                expectation_suite_site_keys = cleaned_keys

            for expectation_suite_key in expectation_suite_site_keys:
                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=expectation_suite_key.
                    expectation_suite_name,
                    section_name="expectations",
                )
Exemplo n.º 4
0
 def from_tuple(cls, tuple_):
     if len(tuple_) < 4:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier tuple must have at least four components."
         )
     return cls(
         run_id=tuple_[0],
         expectation_suite_identifier=ExpectationSuiteIdentifier.from_tuple(
             tuple_[1:-2]),
         metric_name=tuple_[-2],
         metric_kwargs_id=tuple_[-1])
Exemplo n.º 5
0
def test_expectations_store():
    my_store = ExpectationsStore()

    with pytest.raises(TypeError):
        my_store.set("not_a_ValidationResultIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(ns_1,
                 ExpectationSuite(expectation_suite_name="a.b.c.warning"))
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c.warning")

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(ns_2,
                 ExpectationSuite(expectation_suite_name="a.b.c.failure"))
    assert my_store.get(ns_2) == ExpectationSuite(
        expectation_suite_name="a.b.c.failure")

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_expectations_store(empty_data_context):
    context: DataContext = empty_data_context
    my_store = ExpectationsStore()

    with pytest.raises(TypeError):
        my_store.set("not_a_ValidationResultIdentifier")

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning"))
    my_store.set(
        ns_1,
        ExpectationSuite(expectation_suite_name="a.b.c.warning",
                         data_context=context),
    )

    ns_1_dict: dict = my_store.get(ns_1)
    ns_1_suite: ExpectationSuite = ExpectationSuite(**ns_1_dict,
                                                    data_context=context)
    assert ns_1_suite == ExpectationSuite(
        expectation_suite_name="a.b.c.warning", data_context=context)

    ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure"))
    my_store.set(
        ns_2,
        ExpectationSuite(expectation_suite_name="a.b.c.failure",
                         data_context=context),
    )
    ns_2_dict: dict = my_store.get(ns_2)
    ns_2_suite: ExpectationSuite = ExpectationSuite(**ns_2_dict,
                                                    data_context=context)
    assert ns_2_suite == ExpectationSuite(
        expectation_suite_name="a.b.c.failure", data_context=context)

    assert set(my_store.list_keys()) == {
        ns_1,
        ns_2,
    }
def test_ExpectationsStore_with_DatabaseStoreBackend_postgres(caplog):
    connection_kwargs = {
        "drivername": "postgresql",
        "username": "******",
        "password": "",
        "host": "localhost",
        "port": "5432",
        "database": "test_ci",
    }

    # First, demonstrate that we pick up default configuration
    my_store = ExpectationsStore(store_backend={
        "class_name": "DatabaseStoreBackend",
        "credentials": connection_kwargs,
    })
    with pytest.raises(TypeError):
        my_store.get("not_a_ExpectationSuiteIdentifier")

    # first suite to add to db
    default_suite = ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c"))
    # initial set and check if first suite exists
    my_store.set(ns_1, default_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_meta_value"},
        expectations=[],
    )

    # update suite and check if new value exists
    updated_suite = ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
    my_store.set(ns_1, updated_suite)
    assert my_store.get(ns_1) == ExpectationSuite(
        expectation_suite_name="a.b.c",
        meta={"test_meta_key": "test_new_meta_value"},
        expectations=[],
    )
Exemplo n.º 8
0
 def from_tuple(cls, tuple_):
     if len(tuple_) < 6:
         raise GreatExpectationsError(
             "ValidationMetricIdentifier tuple must have at least six components."
         )
     if tuple_[2] == "__":
         tuple_data_asset_name = None
     else:
         tuple_data_asset_name = tuple_[2]
     metric_id = MetricIdentifier.from_tuple(tuple_[-2:])
     return cls(
         run_id=RunIdentifier.from_tuple((tuple_[0], tuple_[1])),
         data_asset_name=tuple_data_asset_name,
         expectation_suite_identifier=ExpectationSuiteIdentifier.from_tuple(
             tuple_[3:-2]),
         metric_name=metric_id.metric_name,
         metric_kwargs_id=metric_id.metric_kwargs_id,
     )
Exemplo n.º 9
0
    def build(self):
        # Loop over sections in the HtmlStore
        logger.debug("DefaultSiteIndexBuilder.build")

        expectation_suite_keys = [
            ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
            for expectation_suite_tuple in self.target_store.store_backends[
                ExpectationSuiteIdentifier
            ].list_keys()
        ]
        validation_and_profiling_result_keys = [
            ValidationResultIdentifier.from_tuple(validation_result_tuple)
            for validation_result_tuple in self.target_store.store_backends[
                ValidationResultIdentifier
            ].list_keys()
        ]
        profiling_result_keys = [
            validation_result_key
            for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id.run_name == "profiling"
        ]
        validation_result_keys = [
            validation_result_key
            for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id.run_name != "profiling"
        ]
        validation_result_keys = sorted(
            validation_result_keys, key=lambda x: x.run_id.run_time, reverse=True
        )
        if self.validation_results_limit:
            validation_result_keys = validation_result_keys[
                : self.validation_results_limit
            ]

        index_links_dict = OrderedDict()
        index_links_dict["site_name"] = self.site_name

        if self.show_how_to_buttons:
            index_links_dict["cta_object"] = self.get_calls_to_action()

        for expectation_suite_key in expectation_suite_keys:
            self.add_resource_info_to_index_links_dict(
                index_links_dict=index_links_dict,
                expectation_suite_name=expectation_suite_key.expectation_suite_name,
                section_name="expectations",
            )

        for profiling_result_key in profiling_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=profiling_result_key.batch_identifier,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=profiling_result_key.run_id,
                    validations_store_name=self.source_stores.get("profiling"),
                )

                batch_kwargs = validation.meta.get("batch_kwargs", {})

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="profiling",
                    batch_identifier=profiling_result_key.batch_identifier,
                    run_id=profiling_result_key.run_id,
                    run_time=profiling_result_key.run_id.run_time,
                    run_name=profiling_result_key.run_id.run_name,
                    asset_name=batch_kwargs.get("data_asset_name"),
                    batch_kwargs=batch_kwargs,
                )
            except Exception:
                error_msg = "Profiling result not found: {0:s} - skipping".format(
                    str(profiling_result_key.to_tuple())
                )
                logger.warning(error_msg)

        for validation_result_key in validation_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=validation_result_key.batch_identifier,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=validation_result_key.run_id,
                    validations_store_name=self.source_stores.get("validations"),
                )

                validation_success = validation.success
                batch_kwargs = validation.meta.get("batch_kwargs", {})

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="validations",
                    batch_identifier=validation_result_key.batch_identifier,
                    run_id=validation_result_key.run_id,
                    validation_success=validation_success,
                    run_time=validation_result_key.run_id.run_time,
                    run_name=validation_result_key.run_id.run_name,
                    asset_name=batch_kwargs.get("data_asset_name"),
                    batch_kwargs=batch_kwargs,
                )
            except Exception:
                error_msg = "Validation result not found: {0:s} - skipping".format(
                    str(validation_result_key.to_tuple())
                )
                logger.warning(error_msg)

        try:
            rendered_content = self.renderer_class.render(index_links_dict)
            viewable_content = self.view_class.render(
                rendered_content,
                data_context_id=self.data_context_id,
                show_how_to_buttons=self.show_how_to_buttons,
            )
        except Exception as e:
            exception_message = f"""\
An unexpected Exception occurred during data docs rendering.  Because of this error, certain parts of data docs will \
not be rendered properly and/or may not appear altogether.  Please use the trace, included in this message, to \
diagnose and repair the underlying issue.  Detailed information follows:
            """
            exception_traceback = traceback.format_exc()
            exception_message += (
                f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
            )
            logger.error(exception_message, e, exc_info=True)

        return (self.target_store.write_index_page(viewable_content), index_links_dict)
Exemplo n.º 10
0
    def build(self, skip_and_clean_missing=True, build_index: bool = True):
        """
        :param skip_and_clean_missing: if True, target html store keys without corresponding source store keys will
        be skipped and removed from the target store
        :param build_index: a flag if False, skips building the index page
        :return: tuple(index_page_url, index_links_dict)
        """

        # Loop over sections in the HtmlStore
        logger.debug("DefaultSiteIndexBuilder.build")
        if not build_index:
            logger.debug("Skipping index rendering")
            return None, None

        index_links_dict = OrderedDict()
        index_links_dict["site_name"] = self.site_name

        if self.show_how_to_buttons:
            index_links_dict["cta_object"] = self.get_calls_to_action()

        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("expectations", "None")
                and self.site_section_builders_config.get(
                    "expectations", "None") not in FALSEY_YAML_STRINGS):
            expectation_suite_source_keys = self.data_context.stores[
                self.site_section_builders_config["expectations"].get(
                    "source_store_name")].list_keys()
            expectation_suite_site_keys = [
                ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple)
                for expectation_suite_tuple in self.target_store.
                store_backends[ExpectationSuiteIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for expectation_suite_site_key in expectation_suite_site_keys:
                    if expectation_suite_site_key not in expectation_suite_source_keys:
                        self.target_store.store_backends[
                            ExpectationSuiteIdentifier].remove_key(
                                expectation_suite_site_key)
                    else:
                        cleaned_keys.append(expectation_suite_site_key)
                expectation_suite_site_keys = cleaned_keys

            for expectation_suite_key in expectation_suite_site_keys:
                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=expectation_suite_key.
                    expectation_suite_name,
                    section_name="expectations",
                )

        validation_and_profiling_result_site_keys = []
        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("validations", "None")
                and self.site_section_builders_config.get(
                    "validations", "None") not in FALSEY_YAML_STRINGS
                or self.site_section_builders_config.get("profiling", "None")
                and self.site_section_builders_config.get(
                    "profiling", "None") not in FALSEY_YAML_STRINGS):
            source_store = (
                "validations"
                # TODO why is this duplicated?
                if
                self.site_section_builders_config.get("validations", "None")
                and self.site_section_builders_config.get(
                    "validations", "None") not in FALSEY_YAML_STRINGS else
                "profiling")
            validation_and_profiling_result_source_keys = self.data_context.stores[
                self.site_section_builders_config[source_store].get(
                    "source_store_name")].list_keys()
            validation_and_profiling_result_site_keys = [
                ValidationResultIdentifier.from_tuple(validation_result_tuple)
                for validation_result_tuple in self.target_store.
                store_backends[ValidationResultIdentifier].list_keys()
            ]
            if skip_and_clean_missing:
                cleaned_keys = []
                for (validation_result_site_key
                     ) in validation_and_profiling_result_site_keys:
                    if (validation_result_site_key not in
                            validation_and_profiling_result_source_keys):
                        self.target_store.store_backends[
                            ValidationResultIdentifier].remove_key(
                                validation_result_site_key)
                    else:
                        cleaned_keys.append(validation_result_site_key)
                validation_and_profiling_result_site_keys = cleaned_keys

        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("profiling", "None")
                and self.site_section_builders_config.get(
                    "profiling", "None") not in FALSEY_YAML_STRINGS):
            profiling_run_name_filter = self.site_section_builders_config[
                "profiling"]["run_name_filter"]
            profiling_result_site_keys = [
                validation_result_key for validation_result_key in
                validation_and_profiling_result_site_keys
                if resource_key_passes_run_name_filter(
                    validation_result_key, profiling_run_name_filter)
            ]
            for profiling_result_key in profiling_result_site_keys:
                try:
                    validation = self.data_context.get_validation_result(
                        batch_identifier=profiling_result_key.batch_identifier,
                        expectation_suite_name=profiling_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        run_id=profiling_result_key.run_id,
                        validations_store_name=self.source_stores.get(
                            "profiling"),
                    )

                    batch_kwargs = validation.meta.get("batch_kwargs", {})
                    batch_spec = validation.meta.get("batch_spec", {})

                    self.add_resource_info_to_index_links_dict(
                        index_links_dict=index_links_dict,
                        expectation_suite_name=profiling_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        section_name="profiling",
                        batch_identifier=profiling_result_key.batch_identifier,
                        run_id=profiling_result_key.run_id,
                        run_time=profiling_result_key.run_id.run_time,
                        run_name=profiling_result_key.run_id.run_name,
                        asset_name=batch_kwargs.get("data_asset_name")
                        or batch_spec.get("data_asset_name"),
                        batch_kwargs=batch_kwargs,
                        batch_spec=batch_spec,
                    )
                except Exception:
                    error_msg = "Profiling result not found: {:s} - skipping".format(
                        str(profiling_result_key.to_tuple()))
                    logger.warning(error_msg)

        if (
                # TODO why is this duplicated?
                self.site_section_builders_config.get("validations", "None")
                and self.site_section_builders_config.get(
                    "validations", "None") not in FALSEY_YAML_STRINGS):
            validations_run_name_filter = self.site_section_builders_config[
                "validations"]["run_name_filter"]
            validation_result_site_keys = [
                validation_result_key for validation_result_key in
                validation_and_profiling_result_site_keys
                if resource_key_passes_run_name_filter(
                    validation_result_key, validations_run_name_filter)
            ]
            validation_result_site_keys = sorted(
                validation_result_site_keys,
                key=lambda x: x.run_id.run_time,
                reverse=True,
            )
            if self.validation_results_limit:
                validation_result_site_keys = validation_result_site_keys[:self
                                                                          .
                                                                          validation_results_limit]
            for validation_result_key in validation_result_site_keys:
                try:
                    validation = self.data_context.get_validation_result(
                        batch_identifier=validation_result_key.
                        batch_identifier,
                        expectation_suite_name=validation_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        run_id=validation_result_key.run_id,
                        validations_store_name=self.source_stores.get(
                            "validations"),
                    )

                    validation_success = validation.success
                    batch_kwargs = validation.meta.get("batch_kwargs", {})
                    batch_spec = validation.meta.get("batch_spec", {})

                    self.add_resource_info_to_index_links_dict(
                        index_links_dict=index_links_dict,
                        expectation_suite_name=validation_result_key.
                        expectation_suite_identifier.expectation_suite_name,
                        section_name="validations",
                        batch_identifier=validation_result_key.
                        batch_identifier,
                        run_id=validation_result_key.run_id,
                        validation_success=validation_success,
                        run_time=validation_result_key.run_id.run_time,
                        run_name=validation_result_key.run_id.run_name,
                        asset_name=batch_kwargs.get("data_asset_name")
                        or batch_spec.get("data_asset_name"),
                        batch_kwargs=batch_kwargs,
                        batch_spec=batch_spec,
                    )
                except Exception:
                    error_msg = "Validation result not found: {:s} - skipping".format(
                        str(validation_result_key.to_tuple()))
                    logger.warning(error_msg)

        try:
            rendered_content = self.renderer_class.render(index_links_dict)
            viewable_content = self.view_class.render(
                rendered_content,
                data_context_id=self.data_context_id,
                show_how_to_buttons=self.show_how_to_buttons,
            )
        except Exception as e:
            exception_message = f"""\
An unexpected Exception occurred during data docs rendering.  Because of this error, certain parts of data docs will \
not be rendered properly and/or may not appear altogether.  Please use the trace, included in this message, to \
diagnose and repair the underlying issue.  Detailed information follows:
            """
            exception_traceback = traceback.format_exc()
            exception_message += (
                f'{type(e).__name__}: "{str(e)}".  Traceback: "{exception_traceback}".'
            )
            logger.error(exception_message)

        return (self.target_store.write_index_page(viewable_content),
                index_links_dict)
def test_configuration_driven_site_builder_skip_and_clean_missing(
    site_builder_data_context_with_html_store_titanic_random,
):
    # tests auto-cleaning functionality of DefaultSiteIndexBuilder
    # when index page is built, if an HTML page is present without corresponding suite or validation result,
    # the HTML page should be removed and not appear on index page
    context = site_builder_data_context_with_html_store_titanic_random

    context.add_validation_operator(
        "validate_and_store",
        {
            "class_name": "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "store_validation_result",
                    "action": {
                        "class_name": "StoreValidationResultAction",
                        "target_store_name": "validations_store",
                    },
                },
                {
                    "name": "extract_and_store_eval_parameters",
                    "action": {
                        "class_name": "StoreEvaluationParametersAction",
                        "target_store_name": "evaluation_parameter_store",
                    },
                },
            ],
        },
    )

    # profiling the Titanic datasource will generate one expectation suite and one validation
    # that is a profiling result
    datasource_name = "titanic"
    data_asset_name = "Titanic"
    profiler_name = "BasicDatasetProfiler"
    generator_name = "subdir_reader"
    context.profile_datasource(datasource_name)

    # creating another validation result using the profiler's suite (no need to use a new expectation suite
    # for this test). having two validation results - one with run id "profiling" - allows us to test
    # the logic of run_name_filter that helps filtering validation results to be included in
    # the profiling and the validation sections.
    batch_kwargs = context.build_batch_kwargs(
        datasource=datasource_name,
        batch_kwargs_generator=generator_name,
        data_asset_name=data_asset_name,
    )

    expectation_suite_name = "{}.{}.{}.{}".format(
        datasource_name, generator_name, data_asset_name, profiler_name
    )

    batch = context.get_batch(
        batch_kwargs=batch_kwargs,
        expectation_suite_name=expectation_suite_name,
    )
    run_id = RunIdentifier(run_name="test_run_id_12345")
    context.run_validation_operator(
        assets_to_validate=[batch],
        run_id=run_id,
        validation_operator_name="validate_and_store",
    )

    data_docs_config = context._project_config.data_docs_sites
    local_site_config = data_docs_config["local_site"]

    validations_set = set(context.stores["validations_store"].list_keys())
    assert len(validations_set) == 6

    expectation_suite_set = set(context.stores["expectations_store"].list_keys())
    assert len(expectation_suite_set) == 5

    site_builder = SiteBuilder(
        data_context=context,
        runtime_environment={"root_directory": context.root_directory},
        **local_site_config
    )
    site_builder.build()

    # test expectation suite pages
    expectation_suite_html_pages = {
        ExpectationSuiteIdentifier.from_tuple(suite_tuple)
        for suite_tuple in site_builder.target_store.store_backends[
            ExpectationSuiteIdentifier
        ].list_keys()
    }
    # suites in expectations store should match html pages
    assert expectation_suite_set == expectation_suite_html_pages

    # remove suites from expectations store
    for i in range(2):
        context.stores["expectations_store"].remove_key(list(expectation_suite_set)[i])

    # re-build data docs, which should remove suite HTML pages that no longer have corresponding suite in
    # expectations store
    site_builder.build()

    expectation_suite_set = set(context.stores["expectations_store"].list_keys())
    expectation_suite_html_pages = {
        ExpectationSuiteIdentifier.from_tuple(suite_tuple)
        for suite_tuple in site_builder.target_store.store_backends[
            ExpectationSuiteIdentifier
        ].list_keys()
    }
    assert expectation_suite_set == expectation_suite_html_pages

    # test validation result pages
    validation_html_pages = {
        ValidationResultIdentifier.from_tuple(result_tuple)
        for result_tuple in site_builder.target_store.store_backends[
            ValidationResultIdentifier
        ].list_keys()
    }
    # validations in store should match html pages
    assert validations_set == validation_html_pages

    # remove validations from store
    for i in range(2):
        context.stores["validations_store"].store_backend.remove_key(
            list(validations_set)[i]
        )

    # re-build data docs, which should remove validation HTML pages that no longer have corresponding validation in
    # validations store
    site_builder.build()

    validations_set = set(context.stores["validations_store"].list_keys())
    validation_html_pages = {
        ValidationResultIdentifier.from_tuple(result_tuple)
        for result_tuple in site_builder.target_store.store_backends[
            ValidationResultIdentifier
        ].list_keys()
    }
    assert validations_set == validation_html_pages
Exemplo n.º 12
0
    def build(self):
        # Loop over sections in the HtmlStore
        logger.debug("DefaultSiteIndexBuilder.build")

        expectation_suite_keys = [
            ExpectationSuiteIdentifier.from_tuple(expectation_suite_tuple) for expectation_suite_tuple in
            self.target_store.store_backends[ExpectationSuiteIdentifier].list_keys()
        ]
        validation_and_profiling_result_keys = [
            ValidationResultIdentifier.from_tuple(validation_result_tuple) for validation_result_tuple in
            self.target_store.store_backends[ValidationResultIdentifier].list_keys()
        ]
        profiling_result_keys = [
            validation_result_key for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id == "profiling"
        ]
        validation_result_keys = [
            validation_result_key for validation_result_key in validation_and_profiling_result_keys
            if validation_result_key.run_id != "profiling"
        ]
        validation_result_keys = sorted(validation_result_keys, key=lambda x: x.run_id, reverse=True)
        if self.validation_results_limit:
            validation_result_keys = validation_result_keys[:self.validation_results_limit]

        index_links_dict = OrderedDict()
        index_links_dict["site_name"] = self.site_name

        if self.show_cta_footer:
            index_links_dict["cta_object"] = self.get_calls_to_action()

        for expectation_suite_key in expectation_suite_keys:
            self.add_resource_info_to_index_links_dict(
                index_links_dict=index_links_dict,
                expectation_suite_name=expectation_suite_key.expectation_suite_name,
                section_name="expectations"
            )

        for profiling_result_key in profiling_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=profiling_result_key.batch_identifier,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=profiling_result_key.run_id
                )

                validation_success = validation.success

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=profiling_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="profiling",
                    batch_identifier=profiling_result_key.batch_identifier,
                    run_id=profiling_result_key.run_id,
                    validation_success=validation_success
                )
            except Exception as e:
                error_msg = "Profiling result not found: {0:s} - skipping".format(str(profiling_result_key.to_tuple()))
                logger.warning(error_msg)

        for validation_result_key in validation_result_keys:
            try:
                validation = self.data_context.get_validation_result(
                    batch_identifier=validation_result_key.batch_identifier,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    run_id=validation_result_key.run_id
                )

                validation_success = validation.success

                self.add_resource_info_to_index_links_dict(
                    index_links_dict=index_links_dict,
                    expectation_suite_name=validation_result_key.expectation_suite_identifier.expectation_suite_name,
                    section_name="validations",
                    batch_identifier=validation_result_key.batch_identifier,
                    run_id=validation_result_key.run_id,
                    validation_success=validation_success
                )
            except Exception as e:
                error_msg = "Validation result not found: {0:s} - skipping".format(str(validation_result_key.to_tuple()))
                logger.warning(error_msg)

        try:
            rendered_content = self.renderer_class.render(index_links_dict)
            viewable_content = self.view_class.render(rendered_content)
        except Exception as e:
            logger.error("Exception occurred during data docs rendering: ", e, exc_info=True)
            return None

        return (
            self.target_store.write_index_page(viewable_content),
            index_links_dict
        )