def test_test_column_aggregate_expectation_function():
    asset = ge.dataset.PandasDataset({
        'x': [1, 3, 5, 7, 9],
        'y': [1, 2, None, 7, 9],
    })

    def expect_second_value_to_be(self, column, value,
                                  result_format=None, include_config=True, catch_exceptions=None, meta=None):
        return {
            "success": self[column].iloc[1] == value,
            "result": {
                "observed_value": self[column].iloc[1],
            }
        }

    assert asset.test_column_aggregate_expectation_function(expect_second_value_to_be, 'x', 2, include_config=False) \
           == \
        ExpectationValidationResult(
            result={'observed_value': 3, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0},
            success=False
        )

    assert asset.test_column_aggregate_expectation_function(expect_second_value_to_be, column='x', value=3, include_config=False) == \
        ExpectationValidationResult(
            result={'observed_value': 3.0, 'element_count': 5, 'missing_count': 0, 'missing_percent': 0.0},
            success=True
        )

    assert asset.test_column_aggregate_expectation_function(
        expect_second_value_to_be, 'y', value=3, result_format="BOOLEAN_ONLY", include_config=False) == \
        ExpectationValidationResult(success=False)

    assert asset.test_column_aggregate_expectation_function(
        expect_second_value_to_be, 'y', 2, result_format="BOOLEAN_ONLY", include_config=False) == ExpectationValidationResult(success=True)
def test_stats_mixed_expectations():
    expectation_results = [
        ExpectationValidationResult(success=False),
        ExpectationValidationResult(success=True),
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(2, 1, 1, 50.0, False)
    assert expected == actual
def test_test_expectation_function():
    asset = ge.dataset.PandasDataset({"x": [1, 3, 5, 7, 9], "y": [1, 2, None, 7, 9],})
    asset_2 = ge.dataset.PandasDataset({"x": [1, 3, 5, 6, 9], "y": [1, 2, None, 6, 9],})

    def expect_dataframe_to_contain_7(self):
        return {"success": bool((self == 7).sum().sum() > 0)}

    assert asset.test_expectation_function(
        expect_dataframe_to_contain_7, include_config=False
    ) == ExpectationValidationResult(success=True)
    assert asset_2.test_expectation_function(
        expect_dataframe_to_contain_7, include_config=False
    ) == ExpectationValidationResult(success=False)
def test_stats_no_successful_expectations():
    expectation_results = [ExpectationValidationResult(success=False)]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(1, 0, 1, 0., False)
    assert expected == actual

    expectation_results = [
        ExpectationValidationResult(success=False),
        ExpectationValidationResult(success=False),
        ExpectationValidationResult(success=False)
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(3, 0, 3, 0., False)
    assert expected == actual
def test_stats_all_successful_expectations():
    expectation_results = [
        ExpectationValidationResult(success=True),
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(1, 1, 0, 100.0, True)
    assert expected == actual

    expectation_results = [
        ExpectationValidationResult(success=True),
        ExpectationValidationResult(success=True),
        ExpectationValidationResult(success=True)
    ]
    actual = _calc_validation_statistics(expectation_results)
    expected = ValidationStatistics(3, 3, 0, 100.0, True)
    assert expected == actual
def test_StoreMetricsAction_column_metric(basic_in_memory_data_context_for_validation_operator):
    action = StoreMetricsAction(
        data_context=basic_in_memory_data_context_for_validation_operator,
        requested_metrics={
            "*": [
                {
                    "column": {
                        "provider_id": ["expect_column_values_to_be_unique.result.unexpected_count"]
                    }
                },
                "statistics.evaluated_expectations",
                "statistics.successful_expectations"
            ]
        },
        target_store_name="metrics_store"
    )

    validation_result = ExpectationSuiteValidationResult(
        success=False,
        meta={
            "expectation_suite_name": "foo",
            "run_id": "bar"
        },
        results=[
            ExpectationValidationResult(
                meta={},
                result={
                    "element_count": 10,
                    "missing_count": 0,
                    "missing_percent": 0.0,
                    "unexpected_count": 7,
                    "unexpected_percent": 0.0,
                    "unexpected_percent_nonmissing": 0.0,
                    "partial_unexpected_list": []
                },
                success=True,
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_unique",
                    kwargs={
                        "column": "provider_id",
                        "result_format": "BASIC"
                    }
                ),
                exception_info=None
            )
        ],
        statistics={
            "evaluated_expectations": 5,
            "successful_expectations": 3
        }
    )

    action.run(validation_result, ValidationResultIdentifier.from_object(validation_result), data_asset=None)

    assert basic_in_memory_data_context_for_validation_operator.stores["metrics_store"].get(ValidationMetricIdentifier(
        run_id="bar",
        expectation_suite_identifier=ExpectationSuiteIdentifier("foo"),
        metric_name="expect_column_values_to_be_unique.result.unexpected_count",
        metric_kwargs_id="column=provider_id"
    )) == 7
Пример #7
0
def test_ProfilingResultsOverviewSectionRenderer_empty_type_list():
    # This rather specific test is a reaction to the error documented in #679
    validation = ExpectationSuiteValidationResult(
        results=[
            ExpectationValidationResult(
                success=True,
                result={
                    'observed_value': "VARIANT",  # Note this is NOT a recognized type by many backends
                },
                exception_info={
                    'raised_exception': False, 'exception_message': None, 'exception_traceback': None
                },
                expectation_config=ExpectationConfiguration(
                    expectation_type='expect_column_values_to_be_in_type_list',
                    kwargs={
                        'column': 'live', 'type_list': None, 'result_format': 'SUMMARY'
                    },
                    meta={'BasicDatasetProfiler': {'confidence': 'very low'}}
                )
            )
        ]
    )

    result = ProfilingResultsOverviewSectionRenderer().render(validation)

    # Find the variable types content block:
    types_table = [
        block.table for block in result.content_blocks
        if block.content_block_type == "table" and block.header.string_template["template"] == "Variable types"
    ][0]
    assert ["unknown", "1"] in types_table
def test_test_column_map_expectation_function():
    asset = ge.dataset.PandasDataset({
        "x": [1, 3, 5, 7, 9],
        "y": [1, 2, None, 7, 9],
    })

    def is_odd(
        self,
        column,
        mostly=None,
        result_format=None,
        include_config=True,
        catch_exceptions=None,
        meta=None,
    ):
        return column % 2 == 1

    assert asset.test_column_map_expectation_function(
        is_odd, column="x",
        include_config=False) == ExpectationValidationResult(
            result={
                "element_count": 5,
                "missing_count": 0,
                "missing_percent": 0,
                "unexpected_percent": 0.0,
                "partial_unexpected_list": [],
                "unexpected_percent_nonmissing": 0.0,
                "unexpected_count": 0,
            },
            success=True,
        )

    assert asset.test_column_map_expectation_function(
        is_odd, "x", result_format="BOOLEAN_ONLY",
        include_config=False) == ExpectationValidationResult(success=True)

    assert asset.test_column_map_expectation_function(
        is_odd, column="y", result_format="BOOLEAN_ONLY",
        include_config=False) == ExpectationValidationResult(success=False)

    assert asset.test_column_map_expectation_function(
        is_odd,
        column="y",
        result_format="BOOLEAN_ONLY",
        mostly=0.7,
        include_config=False,
    ) == ExpectationValidationResult(success=True)
def test_ValidationResultsColumnSectionRenderer_render_header_evr_with_unescaped_dollar_sign(
        titanic_profiled_name_column_evrs):
    evr_with_unescaped_dollar_sign = ExpectationValidationResult(
        success=True,
        result={
            'element_count': 1313,
            'missing_count': 0,
            'missing_percent': 0.0,
            'unexpected_count': 0,
            'unexpected_percent': 0.0,
            'unexpected_percent_nonmissing': 0.0,
            'partial_unexpected_list': [],
            'partial_unexpected_index_list': [],
            'partial_unexpected_counts': []
        },
        exception_info={
            'raised_exception': False,
            'exception_message': None,
            'exception_traceback': None
        },
        expectation_config=ExpectationConfiguration(
            expectation_type='expect_column_values_to_be_in_type_list',
            kwargs={
                'column':
                'Name ($)',
                'type_list':
                ['CHAR', 'StringType', 'TEXT', 'VARCHAR', 'str', 'string'],
                'result_format':
                'SUMMARY'
            }))

    remaining_evrs, content_block = ValidationResultsColumnSectionRenderer._render_header(
        validation_results=[evr_with_unescaped_dollar_sign], )

    print(content_block.to_json_dict())

    assert content_block.to_json_dict() == {
        'content_block_type': 'header',
        'styling': {
            'classes': ['col-12', 'p-0'],
            'header': {
                'classes': ['alert', 'alert-secondary']
            }
        },
        'header': {
            'content_block_type': 'string_template',
            'string_template': {
                'template': 'Name ($$)',
                'tag': 'h5',
                'styling': {
                    'classes': ['m-0']
                }
            }
        }
    }
def test_test_column_map_expectation_function():
    asset = ge.dataset.PandasDataset({
        'x': [1, 3, 5, 7, 9],
        'y': [1, 2, None, 7, 9],
    })

    def is_odd(self,
               column,
               mostly=None,
               result_format=None,
               include_config=True,
               catch_exceptions=None,
               meta=None):
        return column % 2 == 1

    assert asset.test_column_map_expectation_function(
        is_odd, column='x',
        include_config=False) == ExpectationValidationResult(result={
            'element_count':
            5,
            'missing_count':
            0,
            'missing_percent':
            0,
            'unexpected_percent':
            0.0,
            'partial_unexpected_list': [],
            'unexpected_percent_nonmissing':
            0.0,
            'unexpected_count':
            0
        },
                                                             success=True)

    assert asset.test_column_map_expectation_function(is_odd, 'x', result_format="BOOLEAN_ONLY", include_config=False) == \
        ExpectationValidationResult(success=True)

    assert asset.test_column_map_expectation_function(is_odd, column='y', result_format="BOOLEAN_ONLY", include_config=False) == \
        ExpectationValidationResult(success=False)

    assert asset.test_column_map_expectation_function(is_odd, column='y', result_format="BOOLEAN_ONLY", mostly=.7, include_config=False) == \
        ExpectationValidationResult(success=True)
def test__find_evr_by_type(titanic_profiled_evrs_1):
    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results,
                                             "expect_column_to_exist")
    print(found_evr)
    assert found_evr is None

    # TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_distinct_values_to_be_in_set")
    print(found_evr)
    assert found_evr == ExpectationValidationResult(
        success=True,
        result={
            "observed_value": ["*", "1st", "2nd", "3rd"],
            "element_count": 1313,
            "missing_count": 0,
            "missing_percent": 0.0,
            "details": {
                "value_counts": [
                    {
                        "value": "*",
                        "count": 1
                    },
                    {
                        "value": "1st",
                        "count": 322
                    },
                    {
                        "value": "2nd",
                        "count": 279
                    },
                    {
                        "value": "3rd",
                        "count": 711
                    },
                ]
            },
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_distinct_values_to_be_in_set",
            kwargs={
                "column": "PClass",
                "value_set": None,
                "result_format": "SUMMARY"
            },
        ),
    )
Пример #12
0
def test_ProfilingResultsColumnSectionRenderer_render_header_with_unescaped_dollar_sign(titanic_profiled_name_column_evrs):
    evr_with_unescaped_dollar_sign = ExpectationValidationResult(
        success=True,
        result={
            "observed_value": "float64"
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_type_list",
            kwargs={
                "column": "Car Insurance Premiums ($)",
                "type_list": [
                    "DOUBLE_PRECISION",
                    "DoubleType",
                    "FLOAT",
                    "FLOAT4",
                    "FLOAT8",
                    "FloatType",
                    "NUMERIC",
                    "float"
                ],
                "result_format": "SUMMARY"
            },
            meta={
                "BasicDatasetProfiler": {
                "confidence": "very low"
                }
            }
        )
    )

    content_block = ProfilingResultsColumnSectionRenderer._render_header(
        [evr_with_unescaped_dollar_sign],
        column_type=[],
    ).to_json_dict()
    print(content_block)
    assert content_block == {
        'content_block_type': 'header', 'styling': {'classes': ['col-12', 'p-0'], 'header': {
            'classes': ['alert', 'alert-secondary']}}, 'header': {'content_block_type': 'string_template',
                                                                  'string_template': {
                                                                      'template': 'Car Insurance Premiums ($$)',
                                                                      'tooltip': {'content': 'expect_column_to_exist',
                                                                                  'placement': 'top'}, 'tag': 'h5',
                                                                      'styling': {'classes': ['m-0', 'p-0']}}},
        'subheader': {'content_block_type': 'string_template',
                      'string_template': {'template': 'Type: []', 'tooltip': {
                          'content': 'expect_column_values_to_be_of_type <br>expect_column_values_to_be_in_type_list'},
                                          'tag': 'h6', 'styling': {'classes': ['mt-1', 'mb-0']}}}}
Пример #13
0
def test_expectation_decorator_meta():
    metadata = {'meta_key': 'meta_value'}
    eds = ExpectationOnlyDataAsset()
    out = eds.no_op_value_expectation('a', meta=metadata)
    config = eds.get_expectation_suite()

    assert ExpectationValidationResult(
        success=True, meta=metadata,
        expectation_config=config.expectations[0]) == out

    assert ExpectationConfiguration(expectation_type='no_op_value_expectation',
                                    kwargs={'value': 'a'},
                                    meta=metadata) == config.expectations[0]
def test_expectation_decorator_meta():
    metadata = {"meta_key": "meta_value"}
    eds = ExpectationOnlyDataAsset()
    out = eds.no_op_value_expectation("a", meta=metadata)
    config = eds.get_expectation_suite()

    assert (ExpectationValidationResult(
        success=True, meta=metadata,
        expectation_config=config.expectations[0]) == out)

    assert (ExpectationConfiguration(
        expectation_type="no_op_value_expectation",
        kwargs={"value": "a"},
        meta=metadata,
    ) == config.expectations[0])
Пример #15
0
def test_ValidationResultsTableContentBlockRenderer_generate_expectation_row_happy_path():
    evr = ExpectationValidationResult(
        success=True,
        result={
            'observed_value': True,
            'element_count': 162, 'missing_count': 153, 'missing_percent': 94.44444444444444
        },
        exception_info={
            'raised_exception': False, 'exception_message': None, 'exception_traceback': None
        },
        expectation_config=ExpectationConfiguration(
            expectation_type='expect_column_min_to_be_between',
            kwargs={
                'column': 'live', 'min_value': None, 'max_value': None, 'result_format': 'SUMMARY'
            },
            meta={'BasicDatasetProfiler': {'confidence': 'very low'}}
        )
    )
    result = ValidationResultsTableContentBlockRenderer.render([evr]).to_json_dict()
    print(result)

    # Note: A better approach to testing would separate out styling into a separate test.
    assert result == {
        'content_block_type': 'table',
        'styling': {
            'body': {'classes': ['table']},
            'classes': ['ml-2', 'mr-2', 'mt-0', 'mb-0',
                        'table-responsive',
                        'hide-succeeded-validations-column-section-target-child']},
        'table': [[{'content_block_type': 'string_template',
                    'styling': {'parent': {'classes': ['hide-succeeded-validation-target-child']}},
                    'string_template': {'template': '$icon', 'params': {'icon': ''}, 'styling': {
                        'params': {'icon': {'classes': ['fas', 'fa-check-circle', 'text-success'],
                                            'tag': 'i'}}}}}, {'content_block_type': 'string_template',
                                                              'string_template': {
                                                                  'template': '$column minimum value may have any numerical value.',
                                                                  'params': {"column": "live",
                                                                             "min_value": None,
                                                                             "max_value": None,
                                                                             "result_format": "SUMMARY",
                                                                             "parse_strings_as_datetimes": None},
                                                                  'styling': {'default': {
                                                                      'classes': ['badge',
                                                                                  'badge-secondary']},
                                                                      'params': {'column': {
                                                                          'classes': ['badge',
                                                                                      'badge-primary']}}}}},
                   'True']], 'header_row': ['Status', 'Expectation', 'Observed Value']}
def expected_evr_without_unexpected_rows():
    return ExpectationValidationResult(
        success=False,
        expectation_config={
            "expectation_type": "expect_column_values_to_be_in_set",
            "kwargs": {
                "column": "a",
                "value_set": [1, 5, 22],
            },
            "meta": {},
        },
        result={
            "element_count":
            6,
            "unexpected_count":
            2,
            "unexpected_index_list": [3, 5],
            "unexpected_percent":
            33.33333333333333,
            "partial_unexpected_list": [3, 10],
            "unexpected_list": [3, 10],
            "partial_unexpected_index_list": [3, 5],
            "partial_unexpected_counts": [
                {
                    "value": 3,
                    "count": 1
                },
                {
                    "value": 10,
                    "count": 1
                },
            ],
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_percent_total":
            33.33333333333333,
            "unexpected_percent_nonmissing":
            33.33333333333333,
        },
        exception_info={
            "raised_exception": False,
            "exception_traceback": None,
            "exception_message": None,
        },
        meta={},
    )
Пример #17
0
def test__find_evr_by_type(titanic_profiled_evrs_1):
    #TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(titanic_profiled_evrs_1.results,
                                             "expect_column_to_exist")
    print(found_evr)
    assert found_evr is None

    #TODO: _find_all_evrs_by_type should accept an ValidationResultSuite, not ValidationResultSuite.results
    found_evr = Renderer()._find_evr_by_type(
        titanic_profiled_evrs_1.results,
        "expect_column_distinct_values_to_be_in_set")
    print(found_evr)
    assert found_evr == ExpectationValidationResult(
        success=True,
        result={
            'observed_value': ['*', '1st', '2nd', '3rd'],
            'element_count': 1313,
            'missing_count': 0,
            'missing_percent': 0.0,
            'details': {
                'value_counts': [{
                    'value': '*',
                    'count': 1
                }, {
                    'value': '1st',
                    'count': 322
                }, {
                    'value': '2nd',
                    'count': 279
                }, {
                    'value': '3rd',
                    'count': 711
                }]
            }
        },
        exception_info={
            'raised_exception': False,
            'exception_message': None,
            'exception_traceback': None
        },
        expectation_config=ExpectationConfiguration(
            expectation_type='expect_column_distinct_values_to_be_in_set',
            kwargs={
                'column': 'PClass',
                'value_set': None,
                'result_format': 'SUMMARY'
            }))
Пример #18
0
    def _get_diagnostic_rendered_content(
        update_dict: Dict[str, Union[dict, ExpectationConfiguration]],
    ) -> RenderedAtomicContent:
        # Overwrite any fields passed in from test and instantiate ExpectationValidationResult
        evr_kwargs.update(update_dict)
        evr = ExpectationValidationResult(**evr_kwargs)
        expectation_config = evr_kwargs["expectation_config"]
        expectation_type = expectation_config["expectation_type"]

        # Programatically determine the renderer implementations
        renderer_impl = get_renderer_impl(
            object_name=expectation_type,
            renderer_type="atomic.diagnostic.observed_value",
        )[1]

        # Determine RenderedAtomicContent output
        source_obj = {"result": evr}
        res = renderer_impl(**source_obj)
        return res
Пример #19
0
def test_column_map_expectation_decorator():

    # Create a new CustomPandasDataset to
    # (1) demonstrate that custom subclassing works, and
    # (2) Test expectation business logic without dependencies on any other functions.
    class CustomPandasDataset(PandasDataset):
        @MetaPandasDataset.column_map_expectation
        def expect_column_values_to_be_odd(self, column):
            return column.map(lambda x: x % 2)

        @MetaPandasDataset.column_map_expectation
        def expectation_that_crashes_on_sixes(self, column):
            return column.map(lambda x: (x - 6) / 0 != "duck")

    df = CustomPandasDataset({
        'all_odd': [1, 3, 5, 5, 5, 7, 9, 9, 9, 11],
        'mostly_odd': [1, 3, 5, 7, 9, 2, 4, 1, 3, 5],
        'all_even': [2, 4, 4, 6, 6, 6, 8, 8, 8, 8],
        'odd_missing': [1, 3, 5, None, None, None, None, 1, 3, None],
        'mixed_missing': [1, 3, 5, None, None, 2, 4, 1, 3, None],
        'all_missing':
        [None, None, None, None, None, None, None, None, None, None]
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    assert df.expect_column_values_to_be_odd(
        "all_odd") == ExpectationValidationResult(result={
            'element_count':
            10,
            'missing_count':
            0,
            'missing_percent':
            0.0,
            'partial_unexpected_counts': [],
            'partial_unexpected_index_list': [],
            'partial_unexpected_list': [],
            'unexpected_count':
            0,
            'unexpected_index_list': [],
            'unexpected_list': [],
            'unexpected_percent':
            0.0,
            'unexpected_percent_nonmissing':
            0.0
        },
                                                  success=True)

    assert df.expect_column_values_to_be_odd(
        "all_missing") == ExpectationValidationResult(result={
            'element_count':
            10,
            'missing_count':
            10,
            'missing_percent':
            100.0,
            'partial_unexpected_counts': [],
            'partial_unexpected_index_list': [],
            'partial_unexpected_list': [],
            'unexpected_count':
            0,
            'unexpected_index_list': [],
            'unexpected_list': [],
            'unexpected_percent':
            0.0,
            'unexpected_percent_nonmissing':
            None
        },
                                                      success=True)

    assert df.expect_column_values_to_be_odd(
        "odd_missing") == ExpectationValidationResult(result={
            'element_count':
            10,
            'missing_count':
            5,
            'missing_percent':
            50.0,
            'partial_unexpected_counts': [],
            'partial_unexpected_index_list': [],
            'partial_unexpected_list': [],
            'unexpected_count':
            0,
            'unexpected_index_list': [],
            'unexpected_list': [],
            'unexpected_percent':
            0.0,
            'unexpected_percent_nonmissing':
            0.0
        },
                                                      success=True)

    assert df.expect_column_values_to_be_odd(
        "mixed_missing") == ExpectationValidationResult(result={
            'element_count':
            10,
            'missing_count':
            3,
            'missing_percent':
            30.0,
            'partial_unexpected_counts': [{
                'value': 2.,
                'count': 1
            }, {
                'value': 4.,
                'count': 1
            }],
            'partial_unexpected_index_list': [5, 6],
            'partial_unexpected_list': [2., 4.],
            'unexpected_count':
            2,
            'unexpected_index_list': [5, 6],
            'unexpected_list': [2, 4],
            'unexpected_percent':
            20.0,
            'unexpected_percent_nonmissing': (2 / 7 * 100)
        },
                                                        success=False)

    assert df.expect_column_values_to_be_odd(
        "mostly_odd") == ExpectationValidationResult(result={
            'element_count':
            10,
            'missing_count':
            0,
            'missing_percent':
            0,
            'partial_unexpected_counts': [{
                'value': 2.,
                'count': 1
            }, {
                'value': 4.,
                'count': 1
            }],
            'partial_unexpected_index_list': [5, 6],
            'partial_unexpected_list': [2., 4.],
            'unexpected_count':
            2,
            'unexpected_index_list': [5, 6],
            'unexpected_list': [2, 4],
            'unexpected_percent':
            20.0,
            'unexpected_percent_nonmissing':
            20.0
        },
                                                     success=False)

    assert df.expect_column_values_to_be_odd(
        "mostly_odd", mostly=.6) == ExpectationValidationResult(result={
            'element_count':
            10,
            'missing_count':
            0,
            'missing_percent':
            0,
            'partial_unexpected_counts': [{
                'value': 2.,
                'count': 1
            }, {
                'value': 4.,
                'count': 1
            }],
            'partial_unexpected_index_list': [5, 6],
            'partial_unexpected_list': [2., 4.],
            'unexpected_count':
            2,
            'unexpected_index_list': [5, 6],
            'unexpected_list': [2, 4],
            'unexpected_percent':
            20.0,
            'unexpected_percent_nonmissing':
            20.0
        },
                                                                success=True)

    assert df.expect_column_values_to_be_odd("mostly_odd", result_format="BOOLEAN_ONLY") == \
           ExpectationValidationResult(success=False)

    df.default_expectation_args["result_format"] = "BOOLEAN_ONLY"

    assert df.expect_column_values_to_be_odd(
        "mostly_odd") == ExpectationValidationResult(success=False)

    df.default_expectation_args["result_format"] = "BASIC"

    assert df.expect_column_values_to_be_odd(
        "mostly_odd", include_config=True) == ExpectationValidationResult(
            expectation_config=ExpectationConfiguration(
                expectation_type="expect_column_values_to_be_odd",
                kwargs={
                    "column": "mostly_odd",
                    "result_format": "BASIC"
                }),
            result={
                'element_count': 10,
                'missing_count': 0,
                'missing_percent': 0,
                'partial_unexpected_list': [2, 4],
                'unexpected_count': 2,
                'unexpected_percent': 20.0,
                'unexpected_percent_nonmissing': 20.0
            },
            success=False)
Пример #20
0
def test_evaluation_parameter_store_methods(data_context):
    run_id = "20191125T000000.000000Z"
    source_patient_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_patient_data.default",
            "run_id": run_id
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_table_row_count_to_equal",
                    kwargs={
                        "value": 1024,
                    }),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False
                },
                result={
                    "observed_value": 1024,
                    "element_count": 1024,
                    "missing_percent": 0.0,
                    "missing_count": 0
                })
        ],
        success=True)

    data_context.store_evaluation_parameters(source_patient_data_results)

    bound_parameters = data_context.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result'
        '.observed_value':
        1024
    }
    source_diabetes_data_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "source_diabetes_data.default",
            "run_id": run_id
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type=
                    "expect_column_unique_value_count_to_be_between",
                    kwargs={
                        "column": "patient_nbr",
                        "min": 2048,
                        "max": 2048
                    }),
                success=True,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False
                },
                result={
                    "observed_value": 2048,
                    "element_count": 5000,
                    "missing_percent": 0.0,
                    "missing_count": 0
                })
        ],
        success=True)

    data_context.store_evaluation_parameters(source_diabetes_data_results)
    bound_parameters = data_context.evaluation_parameter_store.get_bind_params(
        run_id)
    assert bound_parameters == {
        'urn:great_expectations:validations:source_patient_data.default:expect_table_row_count_to_equal.result'
        '.observed_value':
        1024,
        'urn:great_expectations:validations:source_diabetes_data.default'
        ':expect_column_unique_value_count_to_be_between.result.observed_value:column=patient_nbr':
        2048
    }
def test_exception_list_content_block_renderer():
    # We should grab the exception message and add default formatting
    result = ExceptionListContentBlockRenderer.render([
        ExpectationValidationResult(
            success=False,
            exception_info={
                "raised_exception":
                True,
                "exception_message":
                "Invalid partition object.",
                "exception_traceback":
                "Traceback (most recent call last):\n  File \"/home/user/great_expectations/great_expectations/data_asset/data_asset.py\", line 186, in wrapper\n    return_obj = func(self, **evaluation_args)\n  File \" /home/user/great_expectations/great_expectations/dataset/dataset.py\", line 106, in inner_wrapper\n    evaluation_result = func(self, column, *args, **kwargs)\n  File \"/home/user/great_expectations/great_expectations/dataset/dataset.py\", line 3388, in expect_column_kl_divergence_to_be_less_than\n    raise ValueError(\"Invalid partition object.\")\nValueError: Invalid partition object.\n"
            },
            expectation_config=ExpectationConfiguration(
                expectation_type="expect_column_kl_divergence_to_be_less_than",
                kwargs={
                    "column": "answer",
                    "partition_object": None,
                    "threshold": None,
                    "result_format": "SUMMARY"
                },
                meta={"BasicDatasetProfiler": {
                    "confidence": "very low"
                }}))
    ])

    assert result.to_json_dict() == {
        "content_block_type":
        "bullet_list",
        "bullet_list": [{
            "content_block_type": "string_template",
            "string_template": {
                "template":
                "$column: $expectation_type raised an exception: $exception_message",
                "params": {
                    "column": "answer",
                    "expectation_type":
                    "expect_column_kl_divergence_to_be_less_than",
                    "exception_message": "Invalid partition object."
                },
                "styling": {
                    "classes": ["list-group-item"],
                    "params": {
                        "column": {
                            "classes": ["badge", "badge-primary"]
                        },
                        "expectation_type": {
                            "classes": ["text-monospace"]
                        },
                        "exception_message": {
                            "classes": ["text-monospace"]
                        }
                    }
                }
            }
        }],
        "styling": {
            "classes": ["col-12"],
            "styles": {
                "margin-top": "20px"
            },
            "header": {
                "classes": ["collapsed"],
                "attributes": {
                    "data-toggle": "collapse",
                    "href": "#{{content_block_id}}-body",
                    "role": "button",
                    "aria-expanded": "true",
                    "aria-controls": "collapseExample"
                },
                "styles": {
                    "cursor": "pointer"
                }
            },
            "body": {
                "classes": ["list-group", "collapse"]
            }
        },
        "header":
        "Failed expectations <span class=\"mr-3 triangle\"></span>"
    }
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_table(
        evr_success):
    evr_failed_no_result = ExpectationValidationResult(
        success=False,
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    evr_failed_no_unexpected_list_or_counts = ExpectationValidationResult(
        success=False,
        result={
            "element_count": 1313,
            "missing_count": 0,
            "missing_percent": 0.0,
            "unexpected_count": 1313,
            "unexpected_percent": 100.0,
            "unexpected_percent_nonmissing": 100.0,
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    evr_failed_partial_unexpected_list = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            1313,
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_count":
            1313,
            "unexpected_percent":
            100.0,
            "unexpected_percent_nonmissing":
            100.0,
            "partial_unexpected_list": [
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
            ],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    evr_failed_partial_unexpected_counts = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            1313,
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_count":
            1313,
            "unexpected_percent":
            100.0,
            "unexpected_percent_nonmissing":
            100.0,
            "partial_unexpected_list": [
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
                20,
            ],
            "partial_unexpected_index_list": [
                0,
                1,
                2,
                3,
                4,
                5,
                6,
                7,
                8,
                9,
                10,
                11,
                12,
                13,
                14,
                15,
                16,
                17,
                18,
                19,
            ],
            "partial_unexpected_counts": [
                {
                    "value": 1,
                    "count": 1
                },
                {
                    "value": 2,
                    "count": 1
                },
                {
                    "value": 3,
                    "count": 1
                },
                {
                    "value": 4,
                    "count": 1
                },
                {
                    "value": 5,
                    "count": 1
                },
                {
                    "value": 6,
                    "count": 1
                },
                {
                    "value": 7,
                    "count": 1
                },
                {
                    "value": 8,
                    "count": 1
                },
                {
                    "value": 9,
                    "count": 1
                },
                {
                    "value": 10,
                    "count": 1
                },
                {
                    "value": 11,
                    "count": 1
                },
                {
                    "value": 12,
                    "count": 1
                },
                {
                    "value": 13,
                    "count": 1
                },
                {
                    "value": 14,
                    "count": 1
                },
                {
                    "value": 15,
                    "count": 1
                },
                {
                    "value": 16,
                    "count": 1
                },
                {
                    "value": 17,
                    "count": 1
                },
                {
                    "value": 18,
                    "count": 1
                },
                {
                    "value": 19,
                    "count": 1
                },
                {
                    "value": 20,
                    "count": 1
                },
            ],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_in_set",
            kwargs={
                "column": "Unnamed: 0",
                "value_set": [],
                "result_format": "SUMMARY",
            },
        ),
    )

    # test for succeeded evr
    output_1 = ValidationResultsTableContentBlockRenderer._get_unexpected_table(
        evr_success)
    assert output_1 is None

    # test for failed evr with no "result" key
    output_2 = ValidationResultsTableContentBlockRenderer._get_unexpected_table(
        evr_failed_no_result)
    assert output_2 is None

    # test for failed evr with no unexpected list or unexpected counts
    output_3 = ValidationResultsTableContentBlockRenderer._get_unexpected_table(
        evr_failed_no_unexpected_list_or_counts)
    assert output_3 is None

    # test for failed evr with partial unexpected list
    output_4 = ValidationResultsTableContentBlockRenderer._get_unexpected_table(
        evr_failed_partial_unexpected_list)
    assert output_4.to_json_dict() == {
        "content_block_type":
        "table",
        "table": [
            [1],
            [2],
            [3],
            [4],
            [5],
            [6],
            [7],
            [8],
            [9],
            [10],
            [11],
            [12],
            [13],
            [14],
            [15],
            [16],
            [17],
            [18],
            [19],
            [20],
        ],
        "header_row": ["Sampled Unexpected Values"],
        "styling": {
            "body": {
                "classes": ["table-bordered", "table-sm", "mt-3"]
            }
        },
    }

    # test for failed evr with partial unexpected counts
    output_5 = ValidationResultsTableContentBlockRenderer._get_unexpected_table(
        evr_failed_partial_unexpected_counts)
    assert output_5.to_json_dict() == {
        "content_block_type":
        "table",
        "table": [
            [1],
            [2],
            [3],
            [4],
            [5],
            [6],
            [7],
            [8],
            [9],
            [10],
            [11],
            [12],
            [13],
            [14],
            [15],
            [16],
            [17],
            [18],
            [19],
            [20],
        ],
        "header_row": ["Sampled Unexpected Values"],
        "styling": {
            "body": {
                "classes": ["table-bordered", "table-sm", "mt-3"]
            }
        },
    }
def test_ValidationResultsTableContentBlockRenderer_get_unexpected_statement(
        evr_success, evr_failed):
    evr_no_result = ExpectationValidationResult(
        success=True,
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_table_row_count_to_be_between",
            kwargs={
                "min_value": 0,
                "max_value": None,
                "result_format": "SUMMARY"
            },
        ),
    )
    evr_failed_no_unexpected_count = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            1313,
            "missing_count":
            0,
            "missing_percent":
            0.0,
            "unexpected_percent":
            0.2284843869002285,
            "unexpected_percent_nonmissing":
            0.2284843869002285,
            "partial_unexpected_list": [
                "Daly, Mr Peter Denis ",
                "Barber, Ms ",
                "Geiger, Miss Emily ",
            ],
            "partial_unexpected_index_list": [77, 289, 303],
            "partial_unexpected_counts": [
                {
                    "value": "Barber, Ms ",
                    "count": 1
                },
                {
                    "value": "Daly, Mr Peter Denis ",
                    "count": 1
                },
                {
                    "value": "Geiger, Miss Emily ",
                    "count": 1
                },
            ],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_not_match_regex",
            kwargs={
                "column": "Name",
                "regex": "^\\s+|\\s+$",
                "result_format": "SUMMARY",
            },
        ),
    )

    # test for succeeded evr
    output_1 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement(
        evr_success)
    assert output_1 == []

    # test for failed evr
    output_2 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement(
        evr_failed)
    assert output_2 == [
        RenderedStringTemplateContent(
            **{
                "content_block_type": "string_template",
                "string_template": {
                    "template":
                    "\n\n$unexpected_count unexpected values found. $unexpected_percent of $element_count total rows.",
                    "params": {
                        "unexpected_count": "3",
                        "unexpected_percent": "≈0.2285%",
                        "element_count": "1,313",
                    },
                    "tag": "strong",
                    "styling": {
                        "classes": ["text-danger"]
                    },
                },
            })
    ]

    # test for evr with no "result" key
    output_3 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement(
        evr_no_result)
    print(json.dumps(output_3, indent=2))
    assert output_3 == []

    # test for evr with no unexpected count
    output_4 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement(
        evr_failed_no_unexpected_count)
    print(output_4)
    assert output_4 == []

    # test for evr with exception
    evr_failed_exception = ExpectationValidationResult(
        success=False,
        exception_info={
            "raised_exception":
            True,
            "exception_message":
            "Unrecognized column: not_a_real_column",
            "exception_traceback":
            "Traceback (most recent call last):\n...more_traceback...",
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_not_match_regex",
            kwargs={
                "column": "Name",
                "regex": "^\\s+|\\s+$",
                "result_format": "SUMMARY",
            },
        ),
    )

    output_5 = ValidationResultsTableContentBlockRenderer._get_unexpected_statement(
        evr_failed_exception)
    output_5 = [content.to_json_dict() for content in output_5]
    expected_output_5 = [
        {
            "content_block_type": "string_template",
            "string_template": {
                "template":
                "\n\n$expectation_type raised an exception:\n$exception_message",
                "params": {
                    "expectation_type":
                    "expect_column_values_to_not_match_regex",
                    "exception_message":
                    "Unrecognized column: not_a_real_column",
                },
                "tag": "strong",
                "styling": {
                    "classes": ["text-danger"],
                    "params": {
                        "exception_message": {
                            "tag": "code"
                        },
                        "expectation_type": {
                            "classes": ["badge", "badge-danger", "mb-2"]
                        },
                    },
                },
            },
        },
        {
            "content_block_type":
            "collapse",
            "collapse_toggle_link":
            "Show exception traceback...",
            "collapse": [{
                "content_block_type": "string_template",
                "string_template": {
                    "template":
                    "Traceback (most recent call last):\n...more_traceback...",
                    "tag": "code",
                },
            }],
            "inline_link":
            False,
        },
    ]
    assert output_5 == expected_output_5
Пример #24
0
                "action": {
                    "class_name": "OpenLineageValidationAction",
                    "module_name": "openlineage.common.provider.great_expectations.action"
                }
            }]
        }
    },
    anonymous_usage_statistics={'enabled': False}
)

TABLE_NAME = "test_data"

# Common validation results
table_result = ExpectationValidationResult(success=True,
                                           expectation_config=ExpectationConfiguration(
                                               expectation_type='expect_table_row_count_to_equal',
                                               kwargs={'value': 10}),
                                           result={"observed_value": 10})
column_result = ExpectationValidationResult(success=True,
                                            expectation_config=ExpectationConfiguration(
                                                expectation_type='expect_column_sum_to_be_between',
                                                kwargs={'column': 'size', 'min_value': 0,
                                                        'max_value': 100}
                                            ),
                                            result={'observed_value': 60})
result_suite = ExpectationSuiteValidationResult(success=True, meta={'batch_kwargs': {}},
                                                results=[table_result, column_result])


@pytest.fixture(scope='session')
def test_db_file():
def test_file_format_map_output():
    incomplete_file_path = file_relative_path(
        __file__, '../test_sets/toy_data_incomplete.csv')
    incomplete_file_dat = ge.data_asset.FileDataAsset(incomplete_file_path)
    null_file_path = file_relative_path(__file__, '../test_sets/null_file.csv')
    null_file_dat = ge.data_asset.FileDataAsset(null_file_path)
    white_space_path = file_relative_path(__file__,
                                          '../test_sets/white_space.txt')
    white_space_dat = ge.data_asset.FileDataAsset(white_space_path)

    # Boolean Expectation Output
    expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal(
        regex=r',\S',
        expected_count=3,
        skip=1,
        result_format="BOOLEAN_ONLY",
        include_config=False)
    expected_result = ExpectationValidationResult(success=False)
    assert expected_result == expectation

    # Empty File Expectations
    expectation = null_file_dat.expect_file_line_regex_match_count_to_equal(
        regex=r',\S',
        expected_count=3,
        skip=1,
        result_format="BASIC",
        include_config=False)
    expected_result = ExpectationValidationResult(
        success=None,
        result={
            "element_count": 0,
            "missing_count": 0,
            "missing_percent": None,
            "unexpected_count": 0,
            "unexpected_percent": None,
            "unexpected_percent_nonmissing": None,
            "partial_unexpected_list": []
        })

    assert expected_result == expectation

    # White Space File
    expectation = white_space_dat.expect_file_line_regex_match_count_to_equal(
        regex=r',\S',
        expected_count=3,
        result_format="BASIC",
        include_config=False)
    expected_result = ExpectationValidationResult(
        success=None,
        result={
            "element_count": 11,
            "missing_count": 11,
            "missing_percent": 100.0,
            "unexpected_count": 0,
            "unexpected_percent": 0,
            "unexpected_percent_nonmissing": None,
            "partial_unexpected_list": []
        })

    assert expected_result == expectation

    # Complete Result Format
    expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal(
        regex=r',\S',
        expected_count=3,
        skip=1,
        result_format="COMPLETE",
        include_config=False)

    expected_result = ExpectationValidationResult(
        success=False,
        result={
            "element_count":
            9,
            "missing_count":
            2,
            "missing_percent": (2 / 9 * 100),
            "unexpected_count":
            3,
            "unexpected_percent": (3 / 9 * 100),
            "unexpected_percent_nonmissing": (3 / 7 * 100),
            "partial_unexpected_list": ['A,C,1\n', 'B,1,4\n', 'A,1,4\n'],
            "partial_unexpected_counts": [{
                "value": 'A,1,4\n',
                "count": 1
            }, {
                "value": 'A,C,1\n',
                "count": 1
            }, {
                "value": 'B,1,4\n',
                "count": 1
            }],
            "partial_unexpected_index_list": [0, 3, 5],
            "unexpected_list": ['A,C,1\n', 'B,1,4\n', 'A,1,4\n'],
            "unexpected_index_list": [0, 3, 5]
        })

    assert expected_result == expectation

    # Invalid Result Format
    with pytest.raises(ValueError):
        expectation = incomplete_file_dat.expect_file_line_regex_match_count_to_equal(
            regex=r',\S',
            expected_count=3,
            skip=1,
            result_format="JOKE",
            include_config=False)
Пример #26
0
def test_column_aggregate_expectation_decorator():

    # Create a new CustomPandasDataset to
    # (1) demonstrate that custom subclassing works, and
    # (2) Test expectation business logic without dependencies on any other functions.
    class CustomPandasDataset(PandasDataset):
        @PandasDataset.column_aggregate_expectation
        def expect_column_median_to_be_odd(self, column):
            median = self.get_column_median(column)
            return {
                "success": median % 2,
                "result": {
                    "observed_value": median
                }
            }

    df = CustomPandasDataset({
        'all_odd': [1, 3, 5, 7, 9],
        'all_even': [2, 4, 6, 8, 10],
        'odd_missing': [1, 3, 5, None, None],
        'mixed_missing': [1, 2, None, None, 6],
        'mixed_missing_2': [1, 3, None, None, 6],
        'all_missing': [
            None,
            None,
            None,
            None,
            None,
        ],
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    assert df.expect_column_median_to_be_odd(
        "all_odd") == ExpectationValidationResult(result={
            'observed_value': 5,
            'element_count': 5,
            'missing_count': 0,
            'missing_percent': 0
        },
                                                  success=True)

    assert df.expect_column_median_to_be_odd(
        "all_even") == ExpectationValidationResult(result={
            'observed_value': 6,
            'element_count': 5,
            'missing_count': 0,
            'missing_percent': 0
        },
                                                   success=False)

    assert df.expect_column_median_to_be_odd(
        "all_even",
        result_format="SUMMARY") == ExpectationValidationResult(result={
            'observed_value':
            6,
            'element_count':
            5,
            'missing_count':
            0,
            'missing_percent':
            0
        },
                                                                success=False)

    assert df.expect_column_median_to_be_odd(
        "all_even",
        result_format="BOOLEAN_ONLY") == ExpectationValidationResult(
            success=False)

    df.default_expectation_args["result_format"] = "BOOLEAN_ONLY"
    assert df.expect_column_median_to_be_odd(
        "all_even") == ExpectationValidationResult(success=False)

    assert df.expect_column_median_to_be_odd(
        "all_even",
        result_format="BASIC") == ExpectationValidationResult(result={
            'observed_value':
            6,
            'element_count':
            5,
            'missing_count':
            0,
            'missing_percent':
            0
        },
                                                              success=False)
Пример #27
0
def test_column_pair_map_expectation_decorator():

    # Create a new CustomPandasDataset to
    # (1) Demonstrate that custom subclassing works, and
    # (2) Test expectation business logic without dependencies on any other functions.
    class CustomPandasDataset(PandasDataset):
        @PandasDataset.column_pair_map_expectation
        def expect_column_pair_values_to_be_different(self,
                                                      column_A,
                                                      column_B,
                                                      keep_missing="either",
                                                      output_format=None,
                                                      include_config=True,
                                                      catch_exceptions=None):
            return column_A != column_B

    df = CustomPandasDataset({
        'all_odd': [1, 3, 5, 7, 9],
        'all_even': [2, 4, 6, 8, 10],
        'odd_missing': [1, 3, 5, None, None],
        'mixed_missing': [1, 2, None, None, 6],
        'mixed_missing_2': [1, 3, None, None, 6],
        'all_missing': [
            None,
            None,
            None,
            None,
            None,
        ],
    })
    df.set_default_expectation_argument("result_format", "COMPLETE")
    df.set_default_expectation_argument("include_config", False)

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even") == ExpectationValidationResult(
            success=True,
            result={
                "element_count": 5,
                "missing_count": 0,
                "unexpected_count": 0,
                "missing_percent": 0.0,
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
                "unexpected_list": [],
                "unexpected_index_list": [],
                "partial_unexpected_list": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_counts": [],
            })

    assert df.expect_column_pair_values_to_be_different(
        "all_odd",
        "all_even",
        ignore_row_if="both_values_are_missing",
    ) == ExpectationValidationResult(success=True,
                                     result={
                                         "element_count": 5,
                                         "missing_count": 0,
                                         "unexpected_count": 0,
                                         "missing_percent": 0.0,
                                         "unexpected_percent": 0.0,
                                         "unexpected_percent_nonmissing": 0.0,
                                         "unexpected_list": [],
                                         "unexpected_index_list": [],
                                         "partial_unexpected_list": [],
                                         "partial_unexpected_index_list": [],
                                         "partial_unexpected_counts": [],
                                     })

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "odd_missing") == ExpectationValidationResult(
            success=False,
            result={
                "element_count":
                5,
                "missing_count":
                0,
                "unexpected_count":
                3,
                "missing_percent":
                0.0,
                "unexpected_percent":
                60.0,
                "unexpected_percent_nonmissing":
                60.0,
                "unexpected_list": [(1, 1.), (3, 3.), (5, 5.)],
                "unexpected_index_list": [0, 1, 2],
                "partial_unexpected_list": [(1, 1.), (3, 3.), (5, 5.)],
                "partial_unexpected_index_list": [0, 1, 2],
                "partial_unexpected_counts": [{
                    'count': 1,
                    'value': (1, 1.)
                }, {
                    'count': 1,
                    'value': (3, 3.)
                }, {
                    'count': 1,
                    'value': (5, 5.)
                }]
            })

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "odd_missing", ignore_row_if="both_values_are_missing"
    ) == ExpectationValidationResult(success=False,
                                     result={
                                         "element_count":
                                         5,
                                         "missing_count":
                                         0,
                                         "unexpected_count":
                                         3,
                                         "missing_percent":
                                         0.0,
                                         "unexpected_percent":
                                         60.0,
                                         "unexpected_percent_nonmissing":
                                         60.0,
                                         "unexpected_list": [(1, 1.), (3, 3.),
                                                             (5, 5.)],
                                         "unexpected_index_list": [0, 1, 2],
                                         "partial_unexpected_list": [(1, 1.),
                                                                     (3, 3.),
                                                                     (5, 5.)],
                                         "partial_unexpected_index_list":
                                         [0, 1, 2],
                                         "partial_unexpected_counts": [{
                                             'count':
                                             1,
                                             'value': (1, 1.)
                                         }, {
                                             'count':
                                             1,
                                             'value': (3, 3.)
                                         }, {
                                             'count':
                                             1,
                                             'value': (5, 5.)
                                         }]
                                     })

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "odd_missing", ignore_row_if="either_value_is_missing"
    ) == ExpectationValidationResult(success=False,
                                     result={
                                         "element_count":
                                         5,
                                         "missing_count":
                                         2,
                                         "unexpected_count":
                                         3,
                                         "missing_percent":
                                         40.0,
                                         "unexpected_percent":
                                         60.0,
                                         "unexpected_percent_nonmissing":
                                         100.0,
                                         "unexpected_list": [(1, 1.), (3, 3.),
                                                             (5, 5.)],
                                         "unexpected_index_list": [0, 1, 2],
                                         "partial_unexpected_list": [(1, 1.),
                                                                     (3, 3.),
                                                                     (5, 5.)],
                                         "partial_unexpected_index_list":
                                         [0, 1, 2],
                                         "partial_unexpected_counts": [{
                                             'count':
                                             1,
                                             'value': (1, 1.)
                                         }, {
                                             'count':
                                             1,
                                             'value': (3, 3.)
                                         }, {
                                             'count':
                                             1,
                                             'value': (5, 5.)
                                         }]
                                     })

    with pytest.raises(ValueError):
        df.expect_column_pair_values_to_be_different(
            "all_odd", "odd_missing", ignore_row_if="blahblahblah")

    # Test SUMMARY, BASIC, and BOOLEAN_ONLY output_formats
    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even",
        result_format="SUMMARY") == ExpectationValidationResult(
            success=True,
            result={
                "element_count": 5,
                "missing_count": 0,
                "unexpected_count": 0,
                "missing_percent": 0.0,
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
                "partial_unexpected_list": [],
                "partial_unexpected_index_list": [],
                "partial_unexpected_counts": [],
            })

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even",
        result_format="BASIC") == ExpectationValidationResult(
            success=True,
            result={
                "element_count": 5,
                "missing_count": 0,
                "unexpected_count": 0,
                "missing_percent": 0.0,
                "unexpected_percent": 0.0,
                "unexpected_percent_nonmissing": 0.0,
                "partial_unexpected_list": [],
            })

    assert df.expect_column_pair_values_to_be_different(
        "all_odd", "all_even",
        result_format="BOOLEAN_ONLY") == ExpectationValidationResult(
            success=True)
def test_ValidationResultsTableContentBlockRenderer_get_observed_value(
        evr_success):
    evr_no_result_key = ExpectationValidationResult(
        success=True,
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_table_row_count_to_be_between",
            kwargs={
                "min_value": 0,
                "max_value": None,
                "result_format": "SUMMARY"
            },
        ),
    )

    evr_expect_column_values_to_not_be_null = ExpectationValidationResult(
        success=True,
        result={
            "element_count": 1313,
            "unexpected_count": 1050,
            "unexpected_percent": 79.96953541508,
            "partial_unexpected_list": [],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_not_be_null",
            kwargs={
                "column": "Unnamed: 0",
                "mostly": 0.5,
                "result_format": "SUMMARY"
            },
        ),
    )

    evr_expect_column_values_to_be_null = ExpectationValidationResult(
        success=True,
        result={
            "element_count": 1313,
            "unexpected_count": 0,
            "unexpected_percent": 0.0,
            "partial_unexpected_list": [],
        },
        exception_info={
            "raised_exception": False,
            "exception_message": None,
            "exception_traceback": None,
        },
        expectation_config=ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_null",
            kwargs={
                "column": "Unnamed: 0",
                "mostly": 0.5,
                "result_format": "SUMMARY"
            },
        ),
    )

    # test _get_observed_value when evr.result["observed_value"] exists
    output_1 = ValidationResultsTableContentBlockRenderer._get_observed_value(
        evr_success)
    assert output_1 == "1,313"
    # test _get_observed_value when evr.result does not exist
    output_2 = ValidationResultsTableContentBlockRenderer._get_observed_value(
        evr_no_result_key)
    assert output_2 == "--"
    # test _get_observed_value for expect_column_values_to_not_be_null expectation type
    output_3 = ValidationResultsTableContentBlockRenderer._get_observed_value(
        evr_expect_column_values_to_not_be_null)
    assert output_3 == "≈20.03% not null"
    # test _get_observed_value for expect_column_values_to_be_null expectation type
    output_4 = ValidationResultsTableContentBlockRenderer._get_observed_value(
        evr_expect_column_values_to_be_null)
    assert output_4 == "100% null"
Пример #29
0
def test_evaluation_parameters_for_between_expectations_parse_correctly(
    titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled,
):
    context = titanic_pandas_data_context_with_v013_datasource_with_checkpoints_v1_with_empty_store_stats_enabled

    # Note that if you modify this batch request, you may save the new version as a .json file
    #  to pass in later via the --batch-request option
    df = pandas.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
    batch_request = {
        "datasource_name": "my_datasource",
        "data_connector_name": "my_runtime_data_connector",
        "data_asset_name": "foo",
        "runtime_parameters": {
            "batch_data": df
        },
        "batch_identifiers": {
            "pipeline_stage_name": "kickoff",
            "airflow_run_id": "1234",
        },
    }

    # Feel free to change the name of your suite here. Renaming this will not remove the other one.
    expectation_suite_name = "abcde"
    try:
        suite = context.get_expectation_suite(
            expectation_suite_name=expectation_suite_name)
        print(
            f'Loaded ExpectationSuite "{suite.expectation_suite_name}" containing {len(suite.expectations)} '
            f"expectations.")
    except DataContextError:
        suite = context.create_expectation_suite(
            expectation_suite_name=expectation_suite_name)
        print(f'Created ExpectationSuite "{suite.expectation_suite_name}".')

    validator = context.get_validator(
        batch_request=RuntimeBatchRequest(**batch_request),
        expectation_suite_name=expectation_suite_name,
    )
    column_names = [f'"{column_name}"' for column_name in validator.columns()]
    print(f"Columns: {', '.join(column_names)}.")

    validator.set_evaluation_parameter("my_min", 1)
    validator.set_evaluation_parameter("my_max", 5)

    result = validator.expect_table_row_count_to_be_between(
        min_value={
            "$PARAMETER": "my_min",
            "$PARAMETER.upstream_row_count": 10
        },
        max_value={
            "$PARAMETER": "my_max",
            "$PARAMETER.upstream_row_count": 50
        },
    )

    assert result == ExpectationValidationResult(
        **{
            "expectation_config": {
                "meta": {
                    "substituted_parameters": {
                        "min_value": 1,
                        "max_value": 5
                    }
                },
                "kwargs": {
                    "min_value": 1,
                    "max_value": 5,
                    "batch_id": "15fe04adb6ff20b9fc6eda486b7a36b7",
                },
                "expectation_type": "expect_table_row_count_to_be_between",
                "ge_cloud_id": None,
            },
            "meta": {},
            "exception_info": {
                "raised_exception": False,
                "exception_traceback": None,
                "exception_message": None,
            },
            "success": True,
            "result": {
                "observed_value": 3
            },
        })
def test_validate():

    with open(
            file_relative_path(__file__,
                               "./test_sets/titanic_expectations.json")) as f:
        my_expectation_suite = expectationSuiteSchema.loads(f.read())

    with mock.patch("uuid.uuid1") as uuid:
        uuid.return_value = "1234"
        my_df = ge.read_csv(file_relative_path(__file__,
                                               "./test_sets/Titanic.csv"),
                            expectation_suite=my_expectation_suite)
    my_df.set_default_expectation_argument("result_format", "COMPLETE")

    with mock.patch("datetime.datetime") as mock_datetime:
        mock_datetime.utcnow.return_value = datetime(1955, 11, 5)
        results = my_df.validate(catch_exceptions=False)

    with open(
            file_relative_path(
                __file__,
                './test_sets/titanic_expected_data_asset_validate_results.json'
            )) as f:
        expected_results = expectationSuiteValidationResultSchema.loads(
            f.read())

    del results.meta["great_expectations.__version__"]

    assert expected_results == results

    # Now, change the results and ensure they are no longer equal
    results.results[0] = ExpectationValidationResult()
    assert expected_results != results

    # Finally, confirm that only_return_failures works
    # and does not affect the "statistics" field.
    with mock.patch("datetime.datetime") as mock_datetime:
        mock_datetime.utcnow.return_value = datetime(1955, 11, 5)
        validation_results = my_df.validate(only_return_failures=True)
        del validation_results.meta["great_expectations.__version__"]

    expected_results = ExpectationSuiteValidationResult(
        meta={
            "expectation_suite_name": "titanic",
            "run_id": "19551105T000000.000000Z",
            "batch_kwargs": {
                "ge_batch_id": "1234"
            },
            "batch_markers": {},
            "batch_parameters": {}
        },
        results=[
            ExpectationValidationResult(
                expectation_config=ExpectationConfiguration(
                    expectation_type="expect_column_values_to_be_in_set",
                    kwargs={
                        "column": "PClass",
                        "value_set": ["1st", "2nd", "3rd"]
                    }),
                success=False,
                exception_info={
                    "exception_message": None,
                    "exception_traceback": None,
                    "raised_exception": False
                },
                result={
                    "partial_unexpected_index_list": [456],
                    "unexpected_count": 1,
                    "unexpected_list": ["*"],
                    "unexpected_percent": 0.07616146230007616,
                    "element_count": 1313,
                    "missing_percent": 0.0,
                    "partial_unexpected_counts": [{
                        "count": 1,
                        "value": "*"
                    }],
                    "partial_unexpected_list": ["*"],
                    "unexpected_percent_nonmissing": 0.07616146230007616,
                    "missing_count": 0,
                    "unexpected_index_list": [456]
                })
        ],
        success=expected_results.success,  # unaffected
        statistics=expected_results["statistics"]  # unaffected
    )
    assert expected_results == validation_results