def test_suite_notebook_renderer_render_user_configurable_profiler_configuration( mock_data_context: mock.MagicMock, ): renderer = SuiteProfileNotebookRenderer( context=mock_data_context, expectation_suite_name="my_expectation_suite", profiler_name="", # No name should signal that UserConfigurableProfiler is necessary batch_request={ "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", }, ) notebook = renderer.render() snippets = [ # Imports """import datetime import pandas as pd import great_expectations as ge import great_expectations.jupyter_ux from great_expectations.core.batch import BatchRequest from great_expectations.profile.user_configurable_profiler import ( UserConfigurableProfiler, ) from great_expectations.checkpoint import SimpleCheckpoint from great_expectations.exceptions import DataContextError""", # Batch request """batch_request = { "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", }""", # Profiler instantiation/usage """profiler = UserConfigurableProfiler( profile_dataset=validator, excluded_expectations=None, ignored_columns=ignored_columns, not_null_only=False, primary_or_compound_key=False, semantic_types_dict=None, table_expectations_only=False, value_set_threshold="MANY", ) suite = profiler.build_suite()""", ] for snippet in snippets: assert find_code_in_notebook( notebook, snippet ), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_rule_based_profiler_configuration( mock_data_context: mock.MagicMock, ): renderer = SuiteProfileNotebookRenderer( context=mock_data_context, expectation_suite_name="my_expectation_suite", profiler_name= "my_profiler", # Name should signal that RBP from context's profile store is necessary batch_request={ "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", }, ) notebook = renderer.render() snippets = [ # Imports """\ import datetime import pandas as pd import great_expectations as ge import great_expectations.jupyter_ux from great_expectations.core.batch import BatchRequest from great_expectations.checkpoint import SimpleCheckpoint from great_expectations.exceptions import DataContextError """, # Batch request """\ batch_request = { "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", } """, # Profiler instantiation/usage """\ result = context.run_profiler_with_dynamic_arguments( name="my_profiler", batch_request=batch_request, ) validator.expectation_suite = result.get_expectation_suite( expectation_suite_name=expectation_suite_name )""", ] for snippet in snippets: assert find_code_in_notebook( notebook, snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_rule_based_profiler_configuration( mock_data_context: mock.MagicMock, ): renderer = SuiteProfileNotebookRenderer( context=mock_data_context, expectation_suite_name="my_expectation_suite", profiler_name= "my_profiler", # Name should signal that RBP from context's profile store is necessary batch_request={ "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", }, ) notebook = renderer.render() snippets = SNIPPETS_RULE_BASED_PROFILER for snippet in snippets: assert find_code_in_notebook( notebook, snippet), f"Could not find snippet in Notebook: {snippet}"
def test_suite_notebook_renderer_render_onboarding_data_assistant_configuration( mock_data_context: mock.MagicMock, ): renderer = SuiteProfileNotebookRenderer( context=mock_data_context, expectation_suite_name="my_expectation_suite", profiler_name= "", # No name should signal that OnboardingDataAssistant is invoked batch_request={ "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", }, ) notebook = renderer.render() snippets = SNIPPETS_USER_CONFIGURABLE_PROFILER for snippet in snippets: assert find_code_in_notebook( notebook, snippet), f"Could not find snippet in Notebook: {snippet}"
def _suite_edit_workflow( context: DataContext, expectation_suite_name: str, profile: bool, usage_event: str, interactive: bool, no_jupyter: bool, create_if_not_exist: Optional[bool] = False, datasource_name: Optional[str] = None, batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]] = None, additional_batch_request_args: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None, suppress_usage_message: Optional[bool] = False, assume_yes: Optional[bool] = False, ): # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow(). # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False if suppress_usage_message: usage_event = None suite: ExpectationSuite = toolkit.load_expectation_suite( data_context=context, expectation_suite_name=expectation_suite_name, usage_event=usage_event, create_if_not_exist=create_if_not_exist, ) try: if interactive or profile: batch_request_from_citation_is_up_to_date: bool = True batch_request_from_citation: Optional[Union[str, Dict[str, Union[ str, Dict[str, Any]]]]] = toolkit.get_batch_request_from_citations( expectation_suite=suite) if batch_request is not None and isinstance(batch_request, str): batch_request = toolkit.get_batch_request_from_json_file( batch_request_json_file_path=batch_request, data_context=context, usage_event=usage_event, suppress_usage_message=suppress_usage_message, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not (batch_request and isinstance(batch_request, dict) and BatchRequest(**batch_request)): if (batch_request_from_citation and isinstance(batch_request_from_citation, dict) and BatchRequest(**batch_request_from_citation)): batch_request = copy.deepcopy(batch_request_from_citation) else: batch_request = toolkit.get_batch_request_using_datasource_name( data_context=context, datasource_name=datasource_name, usage_event=usage_event, suppress_usage_message=False, additional_batch_request_args= additional_batch_request_args, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not batch_request_from_citation_is_up_to_date: toolkit.add_citation_with_batch_request( data_context=context, expectation_suite=suite, batch_request=batch_request, ) notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name) notebook_path: str = _get_notebook_path(context, notebook_name) if profile: if not assume_yes: toolkit.prompt_profile_to_create_a_suite( data_context=context, expectation_suite_name=expectation_suite_name) renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer( context=context, expectation_suite_name=expectation_suite_name, batch_request=batch_request, ) renderer.render_to_disk(notebook_file_path=notebook_path) else: SuiteEditNotebookRenderer.from_data_context( data_context=context).render_to_disk( suite=suite, notebook_file_path=notebook_path, batch_request=batch_request, ) if no_jupyter: cli_message( string= f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) else: cli_message( string= """<green>Opening a notebook for you now to edit your expectation suite! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") payload: dict = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name) if not suppress_usage_message: toolkit.send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True, ) if not no_jupyter: toolkit.launch_jupyter_notebook(notebook_path=notebook_path) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, ValueError, OSError, SQLAlchemyError, ) as e: cli_message(string="<red>{}</red>".format(e)) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e
def test_suite_notebook_renderer_render_onboarding_data_assistant_configuration( mock_data_context: mock.MagicMock, ): renderer = SuiteProfileNotebookRenderer( context=mock_data_context, expectation_suite_name="my_expectation_suite", profiler_name= "", # No name should signal that OnboardingDataAssistant is invoked batch_request={ "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", }, ) notebook = renderer.render() snippets = [ # Imports """\ import datetime import pandas as pd import great_expectations as ge import great_expectations.jupyter_ux from great_expectations.core.batch import BatchRequest from great_expectations.rule_based_profiler.types.data_assistant_result import ( DataAssistantResult, ) from great_expectations.checkpoint import SimpleCheckpoint from great_expectations.exceptions import DataContextError""", # Batch request """batch_request = { "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", } """, # OnboardingDataAssistant instantiation/usage """\ data_assistant_result: DataAssistantResult = context.assistants.onboarding.run( batch_request=batch_request, # include_column_names=include_column_names, exclude_column_names=exclude_column_names, # include_column_name_suffixes=include_column_name_suffixes, # exclude_column_name_suffixes=exclude_column_name_suffixes, # semantic_type_filter_module_name=semantic_type_filter_module_name, # semantic_type_filter_class_name=semantic_type_filter_class_name, # include_semantic_types=include_semantic_types, # exclude_semantic_types=exclude_semantic_types, # allowed_semantic_types_passthrough=allowed_semantic_types_passthrough, cardinality_limit_mode="rel_100", # case-insenstive (see documentaiton for other options) # max_unique_values=max_unique_values, # max_proportion_unique=max_proportion_unique, # column_value_uniqueness_rule={ # "success_ratio": 0.8, # }, # column_value_nullity_rule={ # }, # column_value_nonnullity_rule={ # }, # numeric_columns_rule={ # "false_positive_rate": 0.1, # "random_seed": 43792, # }, # datetime_columns_rule={ # "truncate_values": { # "lower_bound": 0, # "upper_bound": 4481049600, # Friday, January 1, 2112 0:00:00 # }, # "round_decimals": 0, # }, # text_columns_rule={ # "strict_min": True, # "strict_max": True, # "success_ratio": 0.8, # }, # categorical_columns_rule={ # "false_positive_rate": 0.1, # "round_decimals": 3, # }, ) validator.expectation_suite = data_assistant_result.get_expectation_suite( expectation_suite_name=expectation_suite_name )""", ] for snippet in snippets: assert find_code_in_notebook( notebook, snippet), f"Could not find snippet in Notebook: {snippet}"