示例#1
0
def test__find_all_evrs_by_type(titanic_profiled_evrs_1):
    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evrs = Renderer()._find_all_evrs_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_to_exist",
        column_=None)
    print(found_evrs)
    assert found_evrs == []

    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evrs = Renderer()._find_all_evrs_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_to_exist",
        column_="SexCode")
    print(found_evrs)
    assert found_evrs == []

    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evrs = Renderer()._find_all_evrs_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_distinct_values_to_be_in_set",
        column_=None,
    )
    print(found_evrs)
    assert len(found_evrs) == 4

    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evrs = Renderer()._find_all_evrs_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_distinct_values_to_be_in_set",
        column_="SexCode",
    )
    print(found_evrs)
    assert len(found_evrs) == 1
示例#2
0
def test__find_evr_by_type(titanic_profiled_evrs_1):
    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results,
                                             "expect_column_to_exist")
    print(found_evr)
    assert found_evr is None

    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_distinct_values_to_be_in_set")
    print(found_evr)
    assert found_evr == ExpectationValidationResult(
        success=True,
        result={
            "observed_value": ["*", "1st", "2nd", "3rd"],
            "element_count": 1313,
            "missing_count": 0,
            "missing_percent": 0.0,
            "details": {
                "value_counts": [
                    {
                        "value": "*",
                        "count": 1
                    },
                    {
                        "value": "1st",
                        "count": 322
                    },
                    {
                        "value": "2nd",
                        "count": 279
                    },
                    {
                        "value": "3rd",
                        "count": 711
                    },
                ]
            },
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_distinct_values_to_be_in_set",
            kwargs={
                "column": "PClass",
                "value_set": None,
                "result_format": "SUMMARY"
            },
        ),
    )
示例#3
0
def test_render():
    # noinspection PyUnusedLocal
    with pytest.raises(NotImplementedError) as e:
        Renderer().render(**{})

    # noinspection PyUnusedLocal
    with pytest.raises(TypeError) as e:
        # noinspection PyArgumentList
        Renderer().render({})

    # noinspection PyUnusedLocal
    with pytest.raises(TypeError) as e:
        # noinspection PyArgumentList
        Renderer().render("wowza")
示例#4
0
def test__find_evr_by_type(titanic_profiled_evrs_1):
    #TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite["results"]
    found_evr = Renderer()._find_evr_by_type(
        titanic_profiled_evrs_1["results"], "expect_column_to_exist")
    print(found_evr)
    assert found_evr == None

    #TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite["results"]
    found_evr = Renderer()._find_evr_by_type(
        titanic_profiled_evrs_1["results"],
        "expect_column_distinct_values_to_be_in_set")
    print(found_evr)
    assert found_evr == {
        'success': True,
        'result': {
            'observed_value': ['*', '1st', '2nd', '3rd'],
            'element_count': 1313,
            'missing_count': 0,
            'missing_percent': 0.0,
            'details': {
                'value_counts': [{
                    'value': '*',
                    'count': 1
                }, {
                    'value': '1st',
                    'count': 322
                }, {
                    'value': '2nd',
                    'count': 279
                }, {
                    'value': '3rd',
                    'count': 711
                }]
            }
        },
        'exception_info': {
            'raised_exception': False,
            'exception_message': None,
            'exception_traceback': None
        },
        'expectation_config': {
            'expectation_type': 'expect_column_distinct_values_to_be_in_set',
            'kwargs': {
                'column': 'PClass',
                'value_set': None,
                'result_format': 'SUMMARY'
            }
        }
    }
示例#5
0
def titanic_profiled_name_column_evrs():

    #This is a janky way to fetch expectations matching a specific name from an EVR suite.
    #TODO: It will no longer be necessary once we implement ValidationResultSuite._group_evrs_by_column
    from great_expectations.render.renderer.renderer import (
        Renderer, )

    titanic_profiled_evrs_1 = json.load(
        open("./tests/render/fixtures/BasicDatasetProfiler_evrs.json"))
    evrs_by_column = Renderer()._group_evrs_by_column(titanic_profiled_evrs_1)
    print(evrs_by_column.keys())

    name_column_evrs = evrs_by_column["Name"]
    print(json.dumps(name_column_evrs, indent=2))

    return name_column_evrs
示例#6
0
def test__get_column_list_from_evrs(titanic_profiled_evrs_1):
    column_list = Renderer()._get_column_list_from_evrs(
        titanic_profiled_evrs_1)
    print(column_list)
    assert column_list == [
        'Unnamed: 0', 'Name', 'PClass', 'Age', 'Sex', 'Survived', 'SexCode'
    ]
示例#7
0
def test__get_column_list_from_evrs(titanic_profiled_evrs_1):
    column_list = Renderer()._get_column_list_from_evrs(titanic_profiled_evrs_1)
    print(column_list)
    assert column_list == [
        "Unnamed: 0",
        "Name",
        "PClass",
        "Age",
        "Sex",
        "Survived",
        "SexCode",
    ]
def test_ProfilingResultsColumnSectionRenderer_render(
        titanic_profiled_evrs_1, titanic_profiled_name_column_evrs):
    #Smoke test for titanic names
    document = ProfilingResultsColumnSectionRenderer().render(
        titanic_profiled_name_column_evrs)
    print(document)
    assert document != {}

    #Smoke test for titanic Ages

    #This is a janky way to fetch expectations matching a specific name from an EVR suite.
    #TODO: It will no longer be necessary once we implement ValidationResultSuite._group_evrs_by_column
    from great_expectations.render.renderer.renderer import (
        Renderer, )
    evrs_by_column = Renderer()._group_evrs_by_column(titanic_profiled_evrs_1)
    print(evrs_by_column.keys())

    age_column_evrs = evrs_by_column["Age"]
    print(json.dumps(age_column_evrs, indent=2))

    document = ProfilingResultsColumnSectionRenderer().render(age_column_evrs)
    print(document)
示例#9
0
def titanic_profiled_name_column_expectations():
    from great_expectations.render.renderer.renderer import (
        Renderer, )

    titanic_profiled_expectations = json.load(
        open("./tests/render/fixtures/BasicDatasetProfiler_expectations.json"))
    columns, ordered_columns = Renderer(
    )._group_and_order_expectations_by_column(titanic_profiled_expectations)
    print(columns)
    print(ordered_columns)

    name_column_expectations = columns["Name"]
    print(json.dumps(name_column_expectations, indent=2))

    return name_column_expectations
示例#10
0
def titanic_profiled_name_column_evrs():
    # This is a janky way to fetch expectations matching a specific name from an EVR suite.
    # TODO: It will no longer be necessary once we implement ValidationResultSuite._group_evrs_by_column
    from great_expectations.render.renderer.renderer import Renderer

    with open(
            file_relative_path(
                __file__,
                "./fixtures/BasicDatasetProfiler_evrs.json"), ) as infile:
        titanic_profiled_evrs_1 = expectationSuiteValidationResultSchema.load(
            json.load(infile))

    evrs_by_column = Renderer()._group_evrs_by_column(titanic_profiled_evrs_1)
    name_column_evrs = evrs_by_column["Name"]

    return name_column_evrs
    def _collect_rendered_document_content_sections(
        self,
        validation_results: ExpectationSuiteValidationResult,
        overview_content_blocks: List[RenderedComponentContent],
        collapse_content_blocks: List[RenderedTableContent],
        columns: Dict[str, list],
    ) -> List[RenderedSectionContent]:
        ordered_columns = Renderer._get_column_list_from_evrs(
            validation_results)
        sections = [
            RenderedSectionContent(
                **{
                    "section_name": "Overview",
                    "content_blocks": overview_content_blocks,
                })
        ]

        if "Table-Level Expectations" in columns:
            sections += [
                self._column_section_renderer.render(
                    validation_results=columns["Table-Level Expectations"],
                    evaluation_parameters=validation_results.
                    evaluation_parameters,
                )
            ]

        sections += [
            self._column_section_renderer.render(
                validation_results=columns[column],
                evaluation_parameters=validation_results.evaluation_parameters,
            ) for column in ordered_columns
        ]
        if self.run_info_at_end:
            sections += [
                RenderedSectionContent(
                    **{
                        "section_name": "Run Info",
                        "content_blocks": collapse_content_blocks,
                    })
            ]

        return sections
    def render(self, validation_results):
        run_id = validation_results.meta["run_id"]
        if isinstance(run_id, str):
            try:
                run_time = parse(run_id).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
            except (ValueError, TypeError):
                run_time = "__none__"
            run_name = run_id
        elif isinstance(run_id, dict):
            run_name = run_id.get("run_name") or "__none__"
            run_time = run_id.get("run_time") or "__none__"
        elif isinstance(run_id, RunIdentifier):
            run_name = run_id.run_name or "__none__"
            run_time = run_id.run_time.strftime("%Y-%m-%dT%H:%M:%S.%fZ")

        expectation_suite_name = validation_results.meta[
            "expectation_suite_name"]
        batch_kwargs = validation_results.meta.get(
            "batch_kwargs", {}) or validation_results.meta.get(
                "batch_spec", {})

        # add datasource key to batch_kwargs if missing
        if "datasource" not in batch_kwargs and "datasource" not in batch_kwargs:
            # check if expectation_suite_name follows datasource.batch_kwargs_generator.data_asset_name.suite_name pattern
            if len(expectation_suite_name.split(".")) == 4:
                if "batch_kwargs" in validation_results.meta:
                    batch_kwargs["datasource"] = expectation_suite_name.split(
                        ".")[0]
                else:
                    batch_kwargs["datasource"] = expectation_suite_name.split(
                        ".")[0]

        # Group EVRs by column
        # TODO: When we implement a ValidationResultSuite class, this method will move there.
        columns = self._group_evrs_by_column(validation_results)

        ordered_columns = Renderer._get_column_list_from_evrs(
            validation_results)
        column_types = self._overview_section_renderer._get_column_types(
            validation_results)

        data_asset_name = batch_kwargs.get("data_asset_name")
        # Determine whether we have a custom run_name
        try:
            run_name_as_time = parse(run_name)
        except ValueError:
            run_name_as_time = None
        try:
            run_time_datetime = parse(run_time)
        except ValueError:
            run_time_datetime = None

        include_run_name: bool = False
        if run_name_as_time != run_time_datetime and run_name_as_time != "__none__":
            include_run_name = True

        page_title = f"Profiling Results / {str(expectation_suite_name)}"
        if data_asset_name:
            page_title += f" / {str(data_asset_name)}"
        if include_run_name:
            page_title += f" / {str(run_name)}"
        page_title += f" / {str(run_time)}"

        return RenderedDocumentContent(
            **{
                "renderer_type":
                "ProfilingResultsPageRenderer",
                "page_title":
                page_title,
                "expectation_suite_name":
                expectation_suite_name,
                "utm_medium":
                "profiling-results-page",
                "batch_kwargs":
                batch_kwargs if "batch_kwargs" in
                validation_results.meta else None,
                "batch_spec":
                batch_kwargs if "batch_spec" in
                validation_results.meta else None,
                "sections": [
                    self._overview_section_renderer.render(
                        validation_results, section_name="Overview")
                ] + [
                    self._column_section_renderer.render(
                        columns[column],
                        section_name=column,
                        column_type=column_types.get(column),
                    ) for column in ordered_columns
                ],
            })
示例#13
0
def test_render():
    # ??? Should this really return the input object?
    # Seems like raising NotImplementedError might be preferable.
    assert Renderer().render({}) == {}
    assert Renderer().render("wowza") == "wowza"