def test_get_batch_kwargs_for_specific_dataasset(empty_data_context, filesystem_csv): project_root_dir = empty_data_context.root_directory context = DataContext(project_root_dir) base_directory = str(filesystem_csv) context.add_datasource( "wow_a_datasource", module_name="great_expectations.datasource", class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": base_directory, } }, ) batch = get_batch_kwargs( context, datasource_name=None, batch_kwargs_generator_name=None, data_asset_name="f1", additional_batch_kwargs={}, ) expected_batch = { "data_asset_name": "f1", "datasource": "wow_a_datasource", "path": os.path.join(filesystem_csv, "f1.csv"), } assert batch == expected_batch
def _suite_scaffold(suite: str, directory: str, jupyter: bool) -> None: usage_event = "cli.suite.scaffold" suite_name = suite context = toolkit.load_data_context_with_error_handling(directory) notebook_filename = f"scaffold_{suite_name}.ipynb" notebook_path = _get_notebook_path(context, notebook_filename) if suite_name in context.list_expectation_suite_names(): toolkit.tell_user_suite_exists(suite_name) if os.path.isfile(notebook_path): cli_message( f" - If you wish to adjust your scaffolding, you can open this notebook with jupyter: `{notebook_path}` <red>(Please note that if you run that notebook, you will overwrite your existing suite.)</red>" ) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) datasource = toolkit.select_datasource(context) if datasource is None: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) _suite = context.create_expectation_suite(suite_name) _, _, _, batch_kwargs = get_batch_kwargs(context, datasource_name=datasource.name) renderer = SuiteScaffoldNotebookRenderer(context, _suite, batch_kwargs) renderer.render_to_disk(notebook_path) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) else: cli_message( f"To continue scaffolding this suite, run `jupyter notebook {notebook_path}`" )
def create_expectation_suite( context, datasource_name=None, batch_kwargs_generator_name=None, generator_asset=None, batch_kwargs=None, expectation_suite_name=None, additional_batch_kwargs=None, empty_suite=False, show_intro_message=False, flag_build_docs=True, open_docs=False, profiler_configuration="demo", data_asset_name=None, ): """ Create a new expectation suite. WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite require a serious revisiting. :return: a tuple: (success, suite name, profiling_results) """ if generator_asset: warnings.warn( "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. " "Please update code accordingly.", DeprecationWarning, ) data_asset_name = generator_asset if show_intro_message and not empty_suite: cli_message( "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n" ) data_source = select_datasource(context, datasource_name=datasource_name) if data_source is None: # select_datasource takes care of displaying an error message, so all is left here is to exit. sys.exit(1) datasource_name = data_source.name if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) sys.exit(1) if ( batch_kwargs_generator_name is None or data_asset_name is None or batch_kwargs is None ): ( datasource_name, batch_kwargs_generator_name, data_asset_name, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=data_asset_name, additional_batch_kwargs=additional_batch_kwargs, ) # In this case, we have "consumed" the additional_batch_kwargs additional_batch_kwargs = {} if expectation_suite_name is None: default_expectation_suite_name = _get_default_expectation_suite_name( batch_kwargs, data_asset_name ) while True: expectation_suite_name = click.prompt( "\nName the new Expectation Suite", default=default_expectation_suite_name, ) if expectation_suite_name in context.list_expectation_suite_names(): tell_user_suite_exists(expectation_suite_name) else: break if empty_suite: create_empty_suite(context, expectation_suite_name, batch_kwargs) return True, expectation_suite_name, None profiling_results = _profile_to_create_a_suite( additional_batch_kwargs, batch_kwargs, batch_kwargs_generator_name, context, datasource_name, expectation_suite_name, data_asset_name, profiler_configuration, ) if flag_build_docs: build_docs(context, view=False) if open_docs: attempt_to_open_validation_results_in_data_docs(context, profiling_results) return True, expectation_suite_name, profiling_results
def validation_operator_run(name, run_name, validation_config_file, suite, directory): # Note though the long lines here aren't pythonic, they look best if Click does the line wraps. """ Run a validation operator against some data. There are two modes to run this command: 1. Interactive (good for development): Specify the name of the validation operator using the --name argument and the name of the expectation suite using the --suite argument. The cli will help you specify the batch of data that you want to validate interactively. 2. Non-interactive (good for production): Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch. Learn how to create a validation config file here: https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1. To learn more about validation operators, go here: https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators """ try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message(f"Failed to process <red>{err.message}</red>") sys.exit(1) try: if validation_config_file is not None: try: with open(validation_config_file) as f: validation_config = json.load(f) except (OSError, json_parse_exception) as e: cli_message( f"Failed to process the --validation_config_file argument: <red>{e}</red>" ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) validation_config_error_message = _validate_valdiation_config( validation_config ) if validation_config_error_message is not None: cli_message( "<red>The validation config in {:s} is misconfigured: {:s}</red>".format( validation_config_file, validation_config_error_message ) ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) else: if suite is None: cli_message( """ Please use --suite argument to specify the name of the expectation suite. Call `great_expectation suite list` command to list the expectation suites in your project. """ ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(0) suite = toolkit.load_expectation_suite( context, suite, "cli.validation_operator.run" ) if name is None: cli_message( """ Please use --name argument to specify the name of the validation operator. Call `great_expectation validation-operator list` command to list the operators in your project. """ ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) else: if name not in context.list_validation_operator_names(): cli_message( f""" Could not find a validation operator {name}. Call `great_expectation validation-operator list` command to list the operators in your project. """ ) toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) batch_kwargs = None cli_message( """ Let us help you specify the batch of data your want the validation operator to validate.""" ) try: data_source = toolkit.select_datasource(context) except ValueError as ve: cli_message(f"<red>{ve}</red>") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, data_asset_name=None, additional_batch_kwargs=None, ) validation_config = { "validation_operator_name": name, "batches": [ { "batch_kwargs": batch_kwargs, "expectation_suite_names": [suite.expectation_suite_name], } ], } try: validation_operator_name = validation_config["validation_operator_name"] batches_to_validate = [] for entry in validation_config["batches"]: for expectation_suite_name in entry["expectation_suite_names"]: batch = context.get_batch( entry["batch_kwargs"], expectation_suite_name ) batches_to_validate.append(batch) if run_name is None: run_name = datetime.datetime.now(datetime.timezone.utc).strftime( "%Y%m%dT%H%M%S.%fZ" ) run_id = RunIdentifier(run_name=run_name) if suite is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, ) else: if suite.evaluation_parameters is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, ) else: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, evaluation_parameters=suite.evaluation_parameters, ) except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e: cli_message(f"<red>{e}</red>") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) sys.exit(1) if not results["success"]: cli_message("Validation failed!") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=True ) sys.exit(1) else: cli_message("Validation succeeded!") toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=True ) sys.exit(0) except Exception as e: toolkit.send_usage_message( data_context=context, event="cli.validation_operator.run", success=False ) raise e
def _suite_edit( suite, datasource, directory, jupyter, batch_kwargs, usage_event, suppress_usage_message=False, ): # suppress_usage_message flag is for the situation where _suite_edit is called by _suite_new(). # when called by _suite_new(), the flag will be set to False, otherwise it will default to True batch_kwargs_json = batch_kwargs batch_kwargs = None context = toolkit.load_data_context_with_error_handling(directory) try: suite = toolkit.load_expectation_suite(context, suite, usage_event) citations = suite.get_citations(require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = toolkit.load_batch(context, suite, batch_kwargs) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = toolkit.select_datasource( context, datasource_name=datasource ) except ValueError as ve: cli_message(f"<red>{ve}</red>") send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, data_asset_name=None, additional_batch_kwargs=additional_batch_kwargs, ) notebook_name = f"edit_{suite.expectation_suite_name}.ipynb" notebook_path = _get_notebook_path(context, notebook_name) SuiteEditNotebookRenderer.from_data_context(context).render_to_disk( suite, notebook_path, batch_kwargs ) if not jupyter: cli_message( f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name ) if not suppress_usage_message: send_usage_message( data_context=context, event=usage_event, event_payload=payload, api_version="v2", success=True, ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) except Exception as e: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) raise e