Exemplo n.º 1
0
def test_suite_notebook_renderer_render_user_configurable_profiler_configuration(
    mock_data_context: mock.MagicMock,
):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name="",  # No name should signal that UserConfigurableProfiler is necessary
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = [
        # Imports
        """import datetime

import pandas as pd

import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.profile.user_configurable_profiler import (
    UserConfigurableProfiler,
)
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError""",
        # Batch request
        """batch_request = {
    "datasource_name": "my_datasource",
    "data_connector_name": "my_basic_data_connector",
    "data_asset_name": "Titanic_1912",
}""",
        # Profiler instantiation/usage
        """profiler = UserConfigurableProfiler(
    profile_dataset=validator,
    excluded_expectations=None,
    ignored_columns=ignored_columns,
    not_null_only=False,
    primary_or_compound_key=False,
    semantic_types_dict=None,
    table_expectations_only=False,
    value_set_threshold="MANY",
)
suite = profiler.build_suite()""",
    ]

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook, snippet
        ), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_rule_based_profiler_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "my_profiler",  # Name should signal that RBP from context's profile store is necessary
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = [
        # Imports
        """\
import datetime

import pandas as pd

import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError
""",
        # Batch request
        """\
batch_request = {
    "datasource_name": "my_datasource",
    "data_connector_name": "my_basic_data_connector",
    "data_asset_name": "Titanic_1912",
}
""",
        # Profiler instantiation/usage
        """\
result = context.run_profiler_with_dynamic_arguments(
    name="my_profiler",
    batch_request=batch_request,
)
validator.expectation_suite = result.get_expectation_suite(
    expectation_suite_name=expectation_suite_name
)""",
    ]

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_rule_based_profiler_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "my_profiler",  # Name should signal that RBP from context's profile store is necessary
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = SNIPPETS_RULE_BASED_PROFILER

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_onboarding_data_assistant_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "",  # No name should signal that OnboardingDataAssistant is invoked
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = SNIPPETS_USER_CONFIGURABLE_PROFILER

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_onboarding_data_assistant_configuration(
    mock_data_context: mock.MagicMock, ):
    renderer = SuiteProfileNotebookRenderer(
        context=mock_data_context,
        expectation_suite_name="my_expectation_suite",
        profiler_name=
        "",  # No name should signal that OnboardingDataAssistant is invoked
        batch_request={
            "datasource_name": "my_datasource",
            "data_connector_name": "my_basic_data_connector",
            "data_asset_name": "Titanic_1912",
        },
    )
    notebook = renderer.render()

    snippets = [
        # Imports
        """\
import datetime

import pandas as pd

import great_expectations as ge
import great_expectations.jupyter_ux
from great_expectations.core.batch import BatchRequest
from great_expectations.rule_based_profiler.types.data_assistant_result import (
    DataAssistantResult,
)
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.exceptions import DataContextError""",
        # Batch request
        """batch_request = {
    "datasource_name": "my_datasource",
    "data_connector_name": "my_basic_data_connector",
    "data_asset_name": "Titanic_1912",
}
""",
        # OnboardingDataAssistant instantiation/usage
        """\
data_assistant_result: DataAssistantResult = context.assistants.onboarding.run(
    batch_request=batch_request,
    # include_column_names=include_column_names,
    exclude_column_names=exclude_column_names,
    # include_column_name_suffixes=include_column_name_suffixes,
    # exclude_column_name_suffixes=exclude_column_name_suffixes,
    # semantic_type_filter_module_name=semantic_type_filter_module_name,
    # semantic_type_filter_class_name=semantic_type_filter_class_name,
    # include_semantic_types=include_semantic_types,
    # exclude_semantic_types=exclude_semantic_types,
    # allowed_semantic_types_passthrough=allowed_semantic_types_passthrough,
    cardinality_limit_mode="rel_100",  # case-insenstive (see documentaiton for other options)
    # max_unique_values=max_unique_values,
    # max_proportion_unique=max_proportion_unique,
    # column_value_uniqueness_rule={
    #     "success_ratio": 0.8,
    # },
    # column_value_nullity_rule={
    # },
    # column_value_nonnullity_rule={
    # },
    # numeric_columns_rule={
    #     "false_positive_rate": 0.1,
    #     "random_seed": 43792,
    # },
    # datetime_columns_rule={
    #     "truncate_values": {
    #         "lower_bound": 0,
    #         "upper_bound": 4481049600,  # Friday, January 1, 2112 0:00:00
    #     },
    #     "round_decimals": 0,
    # },
    # text_columns_rule={
    #     "strict_min": True,
    #     "strict_max": True,
    #     "success_ratio": 0.8,
    # },
    # categorical_columns_rule={
    #     "false_positive_rate": 0.1,
    #     "round_decimals": 3,
    # },
)
validator.expectation_suite = data_assistant_result.get_expectation_suite(
    expectation_suite_name=expectation_suite_name
)""",
    ]

    for snippet in snippets:
        assert find_code_in_notebook(
            notebook,
            snippet), f"Could not find snippet in Notebook: {snippet}"