def checkpoint_new(checkpoint, suite, directory, datasource): """Create a new checkpoint for easy deployments. (Experimental)""" suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: send_usage_message(context, usage_event, success=False) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs(context, datasource.name) template = _load_checkpoint_yml_template() # This picky update helps template comments stay in place template["batches"][0]["batch_kwargs"] = dict(batch_kwargs) template["batches"][0]["expectation_suite_names"] = [ suite.expectation_suite_name ] checkpoint_file = _write_checkpoint_to_disk(context, template, checkpoint) cli_message( f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green> - To edit this checkpoint edit the checkpoint file: {checkpoint_file} - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`""" ) send_usage_message(context, usage_event, success=True)
def _tap_new(suite, tap_filename, directory, usage_event, datasource=None): context = toolkit.load_data_context_with_error_handling(directory) try: _validate_tap_filename(tap_filename) context_directory = context.root_directory datasource = _get_datasource(context, datasource) suite = toolkit.load_expectation_suite(context, suite, usage_event) _, _, _, batch_kwargs = get_batch_kwargs(context, datasource.name) tap_filename = _write_tap_file_to_disk( batch_kwargs, context_directory, suite, tap_filename ) cli_message( f"""\ <green>A new tap has been generated!</green> To run this tap, run: <green>python {tap_filename}</green> You can edit this script or place this code snippet in your pipeline.""" ) send_usage_message( data_context=context, event=usage_event, success=True ) except Exception as e: send_usage_message( data_context=context, event=usage_event, success=False ) raise e
def checkpoint_run(checkpoint, directory): """Run a checkpoint. (Experimental)""" context = toolkit.load_data_context_with_error_handling(directory) usage_event = "cli.checkpoint.run" checkpoint_config = toolkit.load_checkpoint(context, checkpoint, usage_event) checkpoint_file = f"great_expectations/checkpoints/{checkpoint}.yml" # TODO loading batches will move into DataContext eventually batches_to_validate = [] for batch in checkpoint_config["batches"]: _validate_at_least_one_suite_is_listed(context, batch, checkpoint_file) batch_kwargs = batch["batch_kwargs"] for suite_name in batch["expectation_suite_names"]: suite = toolkit.load_expectation_suite(context, suite_name, usage_event) try: batch = toolkit.load_batch(context, suite, batch_kwargs) except (FileNotFoundError, SQLAlchemyError, OSError, DataContextError) as e: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"""<red>There was a problem loading a batch: - Batch: {batch_kwargs} - {e} - Please verify these batch kwargs in the checkpoint file: `{checkpoint_file}`</red>""", ) batches_to_validate.append(batch) try: results = context.run_validation_operator( checkpoint_config["validation_operator_name"], assets_to_validate=batches_to_validate, # TODO prepare for new RunID - checkpoint name and timestamp # run_id=RunID(checkpoint) ) except DataContextError as e: toolkit.exit_with_failure_message_and_stats(context, usage_event, f"<red>{e}</red>") if not results["success"]: cli_message("Validation failed!") send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(1) cli_message("Validation succeeded!") send_usage_message(context, event=usage_event, success=True) print_validation_operator_results_details(results) sys.exit(0)
def checkpoint_new(checkpoint, suite, directory, datasource, legacy): """Create a new checkpoint for easy deployments. (Experimental)""" if legacy: suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) ge_config_version = context.get_config().config_version if ge_config_version >= 3: cli_message( f"""<red>The `checkpoint new` CLI command is not yet implemented for GE config versions >= 3.</red>""" ) send_usage_message(context, usage_event, success=False) sys.exit(1) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: send_usage_message(context, usage_event, success=False) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs( context, datasource.name) _ = context.add_checkpoint( name=checkpoint, **{ "class_name": "LegacyCheckpoint", "validation_operator_name": "action_list_operator", "batches": [{ "batch_kwargs": dict(batch_kwargs), "expectation_suite_names": [suite.expectation_suite_name], }], }, ) cli_message( f"""<green>A checkpoint named `{checkpoint}` was added to your project!</green> - To run this checkpoint run `great_expectations checkpoint run {checkpoint}`""" ) send_usage_message(context, usage_event, success=True) # TODO: <Rob>Rob</Rob> Add flow for new style checkpoints else: pass
def validation_operator_run(name, run_name, validation_config_file, suite, directory): # Note though the long lines here aren't pythonic, they look best if Click does the line wraps. """ Run a validation operator against some data. There are two modes to run this command: 1. Interactive (good for development): Specify the name of the validation operator using the --name argument and the name of the expectation suite using the --suite argument. The cli will help you specify the batch of data that you want to validate interactively. 2. Non-interactive (good for production): Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch. Learn how to create a validation config file here: https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1. To learn more about validation operators, go here: https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators """ try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("Failed to process <red>{}</red>".format(err.message)) sys.exit(1) try: if validation_config_file is not None: try: with open(validation_config_file) as f: validation_config = json.load(f) except (OSError, json_parse_exception) as e: cli_message( f"Failed to process the --validation_config_file argument: <red>{e}</red>" ) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) validation_config_error_message = _validate_valdiation_config( validation_config) if validation_config_error_message is not None: cli_message( "<red>The validation config in {:s} is misconfigured: {:s}</red>" .format(validation_config_file, validation_config_error_message)) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) else: if suite is None: cli_message(""" Please use --suite argument to specify the name of the expectation suite. Call `great_expectation suite list` command to list the expectation suites in your project. """) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(0) suite = toolkit.load_expectation_suite( context, suite, "cli.validation_operator.run") if name is None: cli_message(""" Please use --name argument to specify the name of the validation operator. Call `great_expectation validation-operator list` command to list the operators in your project. """) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) else: if name not in context.list_validation_operator_names(): cli_message(f""" Could not find a validation operator {name}. Call `great_expectation validation-operator list` command to list the operators in your project. """) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) batch_kwargs = None cli_message(""" Let us help you specify the batch of data your want the validation operator to validate.""" ) try: data_source = toolkit.select_datasource(context) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") send_usage_message( data_context=context, event="cli.validation_operator.run", success=False, ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs( context, datasource_name=data_source.name, batch_kwargs_generator_name=None, data_asset_name=None, additional_batch_kwargs=None, ) validation_config = { "validation_operator_name": name, "batches": [{ "batch_kwargs": batch_kwargs, "expectation_suite_names": [suite.expectation_suite_name], }], } try: validation_operator_name = validation_config[ "validation_operator_name"] batches_to_validate = [] for entry in validation_config["batches"]: for expectation_suite_name in entry["expectation_suite_names"]: batch = context.get_batch(entry["batch_kwargs"], expectation_suite_name) batches_to_validate.append(batch) if run_name is None: run_name = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") run_id = RunIdentifier(run_name=run_name) if suite is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, ) else: if suite.evaluation_parameters is None: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, ) else: results = context.run_validation_operator( validation_operator_name, assets_to_validate=batches_to_validate, run_id=run_id, evaluation_parameters=suite.evaluation_parameters, ) except (ge_exceptions.DataContextError, OSError, SQLAlchemyError) as e: cli_message("<red>{}</red>".format(e)) send_usage_message(data_context=context, event="cli.validation_operator.run", success=False) sys.exit(1) if not results["success"]: cli_message("Validation failed!") send_usage_message(data_context=context, event="cli.validation_operator.run", success=True) sys.exit(1) else: cli_message("Validation succeeded!") send_usage_message(data_context=context, event="cli.validation_operator.run", success=True) sys.exit(0) except Exception as e: send_usage_message(data_context=context, event="cli.validation_operator.run", success=False) raise e
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event): batch_kwargs_json = batch_kwargs batch_kwargs = None context = toolkit.load_data_context_with_error_handling(directory) try: suite = toolkit.load_expectation_suite(context, suite, usage_event) citations = suite.get_citations(require_batch_kwargs=True) if batch_kwargs_json: try: batch_kwargs = json.loads(batch_kwargs_json) if datasource: batch_kwargs["datasource"] = datasource _batch = toolkit.load_batch(context, suite, batch_kwargs) except json_parse_exception as je: cli_message( "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format( je ) ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) except ge_exceptions.DataContextError: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.</red>" ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) except ValueError as ve: cli_message( "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format( ve ) ) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) elif citations: citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") if not batch_kwargs: cli_message( """ A batch of data is required to edit the suite - let's help you to specify it.""" ) additional_batch_kwargs = None try: data_source = toolkit.select_datasource(context, datasource_name=datasource) except ValueError as ve: cli_message("<red>{}</red>".format(ve)) send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) if not data_source: cli_message("<red>No datasources found in the context.</red>") send_usage_message( data_context=context, event=usage_event, success=False ) sys.exit(1) if batch_kwargs is None: ( datasource_name, batch_kwargs_generator, data_asset, batch_kwargs, ) = get_batch_kwargs(context, datasource_name=data_source.name, additional_batch_kwargs=additional_batch_kwargs) notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name) notebook_path = _get_notebook_path(context, notebook_name) SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs) if not jupyter: cli_message( f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) payload = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name ) send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True ) if jupyter: toolkit.launch_jupyter_notebook(notebook_path) except Exception as e: send_usage_message(data_context=context, event=usage_event, success=False) raise e
def _suite_edit_workflow( context: DataContext, expectation_suite_name: str, profile: bool, usage_event: str, interactive: bool, no_jupyter: bool, create_if_not_exist: Optional[bool] = False, datasource_name: Optional[str] = None, batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]] = None, additional_batch_request_args: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None, suppress_usage_message: Optional[bool] = False, assume_yes: Optional[bool] = False, ): # suppress_usage_message flag is for the situation where _suite_edit_workflow is called by _suite_new_workflow(). # when called by _suite_new_workflow(), the flag will be set to True, otherwise it will default to False if suppress_usage_message: usage_event = None suite: ExpectationSuite = toolkit.load_expectation_suite( data_context=context, expectation_suite_name=expectation_suite_name, usage_event=usage_event, create_if_not_exist=create_if_not_exist, ) try: if interactive or profile: batch_request_from_citation_is_up_to_date: bool = True batch_request_from_citation: Optional[Union[str, Dict[str, Union[ str, Dict[str, Any]]]]] = toolkit.get_batch_request_from_citations( expectation_suite=suite) if batch_request is not None and isinstance(batch_request, str): batch_request = toolkit.get_batch_request_from_json_file( batch_request_json_file_path=batch_request, data_context=context, usage_event=usage_event, suppress_usage_message=suppress_usage_message, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not (batch_request and isinstance(batch_request, dict) and BatchRequest(**batch_request)): if (batch_request_from_citation and isinstance(batch_request_from_citation, dict) and BatchRequest(**batch_request_from_citation)): batch_request = copy.deepcopy(batch_request_from_citation) else: batch_request = toolkit.get_batch_request_using_datasource_name( data_context=context, datasource_name=datasource_name, usage_event=usage_event, suppress_usage_message=False, additional_batch_request_args= additional_batch_request_args, ) if batch_request != batch_request_from_citation: batch_request_from_citation_is_up_to_date = False if not batch_request_from_citation_is_up_to_date: toolkit.add_citation_with_batch_request( data_context=context, expectation_suite=suite, batch_request=batch_request, ) notebook_name: str = "edit_{}.ipynb".format(expectation_suite_name) notebook_path: str = _get_notebook_path(context, notebook_name) if profile: if not assume_yes: toolkit.prompt_profile_to_create_a_suite( data_context=context, expectation_suite_name=expectation_suite_name) renderer: SuiteProfileNotebookRenderer = SuiteProfileNotebookRenderer( context=context, expectation_suite_name=expectation_suite_name, batch_request=batch_request, ) renderer.render_to_disk(notebook_file_path=notebook_path) else: SuiteEditNotebookRenderer.from_data_context( data_context=context).render_to_disk( suite=suite, notebook_file_path=notebook_path, batch_request=batch_request, ) if no_jupyter: cli_message( string= f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>" ) else: cli_message( string= """<green>Opening a notebook for you now to edit your expectation suite! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""") payload: dict = edit_expectation_suite_usage_statistics( data_context=context, expectation_suite_name=suite.expectation_suite_name) if not suppress_usage_message: toolkit.send_usage_message( data_context=context, event=usage_event, event_payload=payload, success=True, ) if not no_jupyter: toolkit.launch_jupyter_notebook(notebook_path=notebook_path) except ( ge_exceptions.DataContextError, ge_exceptions.ProfilerError, ValueError, OSError, SQLAlchemyError, ) as e: cli_message(string="<red>{}</red>".format(e)) if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) sys.exit(1) except Exception as e: if not suppress_usage_message: toolkit.send_usage_message(data_context=context, event=usage_event, success=False) raise e