def get_batch_request_from_json_file( batch_request_json_file_path: str, data_context: DataContext, usage_event: Optional[str] = None, suppress_usage_message: bool = False, ) -> Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]]: batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[ str, Any]]]]] = load_json_file_into_dict( filepath=batch_request_json_file_path, data_context=data_context, usage_event=usage_event, ) try: batch_request = BatchRequest(**batch_request).to_json_dict() except TypeError as e: cli_message( string= "<red>Please check that your batch_request is valid and is able to load a batch.</red>" ) cli_message(string=f"<red>{e}</red>") if not suppress_usage_message: send_usage_message( data_context=data_context, event=usage_event, success=False, ) sys.exit(1) return batch_request
def checkpoint_list(directory) -> None: """List configured checkpoints. (Experimental)""" context = toolkit.load_data_context_with_error_handling(directory) checkpoints = context.list_checkpoints() if not checkpoints: cli_message( "No checkpoints found.\n" " - Use the command `great_expectations checkpoint new` to create one." ) send_usage_message( data_context=context, event="cli.checkpoint.list", api_version="v2", success=True, ) sys.exit(0) number_found = len(checkpoints) plural = "s" if number_found > 1 else "" message = f"Found {number_found} checkpoint{plural}." pretty_list = [f" - <cyan>{cp}</cyan>" for cp in checkpoints] cli_message_list(pretty_list, list_intro_string=message) send_usage_message( data_context=context, event="cli.checkpoint.list", api_version="v2", success=True, )
def checkpoint(ctx: click.Context) -> None: """ Checkpoint operations A Checkpoint is a bundle of one or more batches of data with one or more Expectation Suites. A Checkpoint can be as simple as one batch of data paired with one Expectation Suite. A Checkpoint can be as complex as many batches of data across different datasources paired with one or more Expectation Suites each. """ ctx.obj.data_context = ctx.obj.get_data_context_from_config_file() cli_event_noun: str = "checkpoint" ( begin_event_name, end_event_name, ) = UsageStatsEvents.get_cli_begin_and_end_event_names( noun=cli_event_noun, verb=ctx.invoked_subcommand, ) send_usage_message( data_context=ctx.obj.data_context, event=begin_event_name, success=True, ) ctx.obj.usage_event_end = end_event_name
def checkpoint_list(ctx: click.Context) -> None: """List configured checkpoints.""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end checkpoints: List[str] = context.list_checkpoints() if not checkpoints: cli_message( "No Checkpoints found.\n" " - Use the command `great_expectations checkpoint new` to create one." ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) sys.exit(0) number_found: int = len(checkpoints) plural: str = "s" if number_found > 1 else "" message: str = f"Found {number_found} Checkpoint{plural}." pretty_list: list = [f" - <cyan>{cp}</cyan>" for cp in checkpoints] cli_message_list(pretty_list, list_intro_string=message) send_usage_message( data_context=context, event=usage_event_end, success=True, )
def checkpoint_delete(ctx: click.Context, checkpoint: str) -> None: """Delete a Checkpoint.""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: toolkit.delete_checkpoint( context=context, checkpoint_name=checkpoint, usage_event=usage_event_end, assume_yes=ctx.obj.assume_yes, ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) except Exception as e: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return cli_message(f'Checkpoint "{checkpoint}" deleted.') sys.exit(0)
def docs_list(ctx): """List known Data Docs sites.""" context = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end docs_sites_url_dicts = context.get_docs_sites_urls() try: if len(docs_sites_url_dicts) == 0: cli_message("No Data Docs sites found") else: docs_sites_strings = [ " - <cyan>{}</cyan>: {}".format( docs_site_dict["site_name"], docs_site_dict.get("site_url") or f"site configured but does not exist. Run the following command to build site: great_expectations " f'docs build --site-name {docs_site_dict["site_name"]}', ) for docs_site_dict in docs_sites_url_dicts ] list_intro_string = _build_intro_string(docs_sites_strings) cli_message_list(docs_sites_strings, list_intro_string) send_usage_message( data_context=context, event=usage_event_end, success=True, ) except Exception as e: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return
def docs_list(directory) -> None: """List known Data Docs Sites.""" context = toolkit.load_data_context_with_error_handling(directory) docs_sites_url_dicts = context.get_docs_sites_urls() docs_sites_strings = [ " - <cyan>{}</cyan>: {}".format( docs_site_dict["site_name"], docs_site_dict.get("site_url") or f"site configured but does not exist. Run the following command to build site: great_expectations " f'docs build --site-name {docs_site_dict["site_name"]}', ) for docs_site_dict in docs_sites_url_dicts ] if len(docs_sites_strings) == 0: cli_message("No Data Docs sites found") else: list_intro_string = _build_intro_string(docs_sites_strings) cli_message_list(docs_sites_strings, list_intro_string) send_usage_message( data_context=context, event="cli.docs.list", api_version="v2", success=True, )
def clean_data_docs(directory, site_name=None, all=None) -> None: """Delete data docs""" context = toolkit.load_data_context_with_error_handling(directory) failed = True if site_name is None and all is None: cli_message("<red>{}</red>".format( "Please specify --all to remove all sites or specify a specific site using " "--site_name")) sys.exit(1) context.clean_data_docs(site_name=site_name) failed = False if not failed and context is not None: send_usage_message( data_context=context, event="cli.docs.clean", api_version="v2", success=True, ) cli_message("<green>Cleaned data docs</green>") if failed and context is not None: send_usage_message( data_context=context, event="cli.docs.clean", api_version="v2", success=False, )
def docs_clean(ctx, site_name=None, all_sites=False): """ Remove all files from a Data Docs site. This is a useful first step if you wish to completely re-build a site from scratch. """ context = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end if (site_name is None and all_sites is False) or (site_name and all_sites): toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message="<red>Please specify either --all to clean all sites or a specific site using --site-name</red>", ) try: # if site_name is None, context.clean_data_docs(site_name=site_name) # will clean all sites. context.clean_data_docs(site_name=site_name) send_usage_message( data_context=context, event=usage_event_end, success=True, ) cli_message("<green>{}</green>".format("Cleaned data docs")) except DataContextError as de: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{de}</red>", )
def docs_build(ctx, site_name=None, no_view=False): """Build Data Docs for a project.""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end if site_name is not None and site_name not in context.get_site_names(): toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>The specified site name `{site_name}` does not exist in this project.</red>", ) if site_name is None: sites_to_build = context.get_site_names() else: sites_to_build = [site_name] build_docs( context, usage_stats_event=usage_event_end, site_names=sites_to_build, view=not no_view, assume_yes=ctx.obj.assume_yes, ) send_usage_message( data_context=context, event=usage_event_end, success=True, )
def suite_delete(suite, directory): """ Delete an expectation suite from the expectation store. """ usage_event = "cli.suite.delete" context = toolkit.load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if not suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, "</red>No expectation suites found in the project.</red>", ) if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"No expectation suite named {suite} found." ) context.delete_expectation_suite(suite) cli_message(f"Deleted the expectation suite named: {suite}") send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, )
def datasource_list(ctx: click.Context) -> None: """List known Datasources.""" context = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: datasources = context.list_datasources() cli_message(_build_datasource_intro_string(datasources)) for datasource in datasources: cli_message("") cli_message_dict({ "name": datasource["name"], "class_name": datasource["class_name"], }) send_usage_message( data_context=context, event=usage_event_end, success=True, ) except Exception as e: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return
def upgrade_project_strictly_multiple_versions_increment( directory: str, ge_config_version: float, from_cli_upgrade_command: bool = False) -> Optional[DataContext]: upgrade_helper_class = (GE_UPGRADE_HELPER_VERSION_MAP.get( int(ge_config_version)) if ge_config_version else None) context: Optional[DataContext] if upgrade_helper_class and int( ge_config_version) < CURRENT_GE_CONFIG_VERSION: upgrade_project( context_root_dir=directory, ge_config_version=ge_config_version, from_cli_upgrade_command=from_cli_upgrade_command, ) context = DataContext(context_root_dir=directory) # noinspection PyBroadException try: send_usage_message( data_context=context, event="cli.project.upgrade.end", success=True, ) except Exception: # Don't fail for usage stats pass else: context = None return context
def get_batch_request_using_datasource_name( data_context: DataContext, datasource_name: Optional[str] = None, usage_event: Optional[str] = None, suppress_usage_message: bool = False, additional_batch_request_args: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None, ) -> Optional[Union[str, Dict[str, Union[str, int, Dict[str, Any]]]]]: cli_message( string= "\nA batch of data is required to edit the suite - let's help you to specify it.\n" ) datasource: BaseDatasource = select_datasource( context=data_context, datasource_name=datasource_name) if not datasource: cli_message(string="<red>No datasources found in the context.</red>") if not suppress_usage_message: send_usage_message( data_context=data_context, event=usage_event, success=False, ) sys.exit(1) batch_request: Optional[Union[str, Dict[str, Union[str, int, Dict[ str, Any]]]]] = get_batch_request( datasource=datasource, additional_batch_request_args=additional_batch_request_args, ) return batch_request
def _datasource_new_flow( context: DataContext, usage_event_end: str, datasource_name: Optional[str] = None, jupyter: bool = True, ) -> None: files_or_sql_selection = click.prompt( """ What data would you like Great Expectations to connect to? 1. Files on a filesystem (for processing with Pandas or Spark) 2. Relational database (SQL) """, type=click.Choice(["1", "2"]), show_choices=False, ) if files_or_sql_selection == "1": selected_files_backend = _prompt_for_execution_engine() helper = _get_files_helper( selected_files_backend, context_root_dir=context.root_directory, datasource_name=datasource_name, ) elif files_or_sql_selection == "2": if not _verify_sqlalchemy_dependent_modules(): return None selected_database = _prompt_user_for_database_backend() helper = _get_sql_yaml_helper_class(selected_database, datasource_name) else: helper = None helper.send_backend_choice_usage_message(context) if not helper.verify_libraries_installed(): return None helper.prompt() notebook_path = helper.create_notebook(context) if jupyter is False: cli_message( f"To continue editing this Datasource, run <green>jupyter notebook {notebook_path}</green>" ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) return None if notebook_path: cli_message( """<green>Because you requested to create a new Datasource, we'll open a notebook for you now to complete it!</green>\n\n""" ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) toolkit.launch_jupyter_notebook(notebook_path)
def docs_build(directory, site_name, view=True, assume_yes=False) -> None: """Build Data Docs for a project.""" context = toolkit.load_data_context_with_error_handling(directory) build_docs(context, site_name=site_name, view=view, assume_yes=assume_yes) send_usage_message( data_context=context, event="cli.docs.build", api_version="v2", success=True, )
def exit_with_failure_message_and_stats(context: DataContext, usage_event: str, message: str) -> None: cli_message(message) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1)
def send_backend_choice_usage_message(self, context: DataContext) -> None: send_usage_message( data_context=context, event=UsageStatsEvents.CLI_NEW_DS_CHOICE.value, event_payload={ "type": self.datasource_type.value, **self.usage_stats_payload, }, success=True, )
def datasource(ctx): """Datasource operations""" ctx.obj.data_context = ctx.obj.get_data_context_from_config_file() usage_stats_prefix = f"cli.datasource.{ctx.invoked_subcommand}" send_usage_message( data_context=ctx.obj.data_context, event=f"{usage_stats_prefix}.begin", success=True, ) ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
def send_backend_choice_usage_message(self, context: DataContext) -> None: send_usage_message( data_context=context, event="cli.new_ds_choice", event_payload={ "type": self.datasource_type.value, **self.usage_stats_payload, }, success=True, )
def suite_demo(ctx: click.Context) -> None: """This command is not supported in the v3 (Batch Request) API.""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end send_usage_message( data_context=context, event=usage_event_end, success=True, ) cli_message( string="This command is not supported in the v3 (Batch Request) API. Please use `suite new` instead." )
def _exit_early_if_error( error_message: Optional[str], context: DataContext, usage_event_end: str, interactive_mode: CLISuiteInteractiveFlagCombinations, ) -> None: if error_message is not None: cli_message(string=f"<red>{error_message}</red>") send_usage_message( data_context=context, event=usage_event_end, event_payload=interactive_mode.value, success=False, ) sys.exit(1)
def exit_with_failure_message_and_stats( data_context: DataContext, usage_event: str, suppress_usage_message: bool = False, message: Optional[str] = None, ) -> None: if message: cli_message(string=message) if not suppress_usage_message: send_usage_message( data_context=data_context, event=usage_event, success=False, ) sys.exit(1)
def suite_delete(ctx: click.Context, suite: str) -> None: """ Delete an Expectation Suite from the Expectation Store. """ context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: suite_names: List[str] = context.list_expectation_suite_names() except Exception as e: send_usage_message( data_context=context, event=usage_event_end, success=False, ) raise e if not suite_names: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, suppress_usage_message=False, message="<red>No expectation suites found in the project.</red>", ) if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, suppress_usage_message=False, message=f"<red>No expectation suite named {suite} found.</red>", ) if not ( ctx.obj.assume_yes or toolkit.confirm_proceed_or_exit( exit_on_no=False, data_context=context, usage_stats_event=usage_event_end ) ): cli_message(string=f"Suite `{suite}` was not deleted.") sys.exit(0) context.delete_expectation_suite(suite) cli_message(string=f"Deleted the expectation suite named: {suite}") send_usage_message( data_context=context, event=usage_event_end, success=True, )
def project_check_config(directory): """Check a config for validity and help with migrations.""" cli_message("Checking your config files for validity...\n") is_config_ok, error_message, context = do_config_check(directory) if context: send_usage_message( data_context=context, event="cli.project.check_config", api_version="v2", success=True, ) if not is_config_ok: cli_message("Unfortunately, your config appears to be invalid:\n") cli_message(f"<red>{error_message}</red>") sys.exit(1) cli_message("<green>Your config file appears valid!</green>")
def confirm_proceed_or_exit( confirm_prompt: str = "Would you like to proceed?", continuation_message: str = "Ok, exiting now. You can always read more at https://docs.greatexpectations.io/ !", exit_on_no: bool = True, exit_code: int = 0, data_context: Optional[DataContext] = None, usage_stats_event: Optional[str] = None, ) -> bool: """ Every CLI command that starts a potentially lengthy (>1 sec) computation or modifies some resources (e.g., edits the config file, adds objects to the stores) must follow this pattern: 1. Explain which resources will be created/modified/deleted 2. Use this method to ask for user's confirmation The goal of this standardization is for the users to expect consistency - if you saw one command, you know what to expect from all others. If the user does not confirm, the program should exit. The purpose of the exit_on_no parameter is to provide the option to perform cleanup actions before exiting outside of the function. """ confirm_prompt_colorized = cli_colorize_string(confirm_prompt) continuation_message_colorized = cli_colorize_string(continuation_message) if not click.confirm(confirm_prompt_colorized, default=True): if exit_on_no: cli_message(string=continuation_message_colorized) cli_message(string=continuation_message_colorized) if (usage_stats_event is not None) and (data_context is not None): # noinspection PyBroadException try: send_usage_message( data_context=data_context, event=usage_stats_event, event_payload={"cancelled": True}, success=True, ) except Exception as e: # Don't fail on usage stats logger.debug( f"Something went wrong when sending usage stats: {e}") pass sys.exit(exit_code) else: return False return True
def datasource(ctx: click.Context) -> None: """Datasource operations""" ctx.obj.data_context = ctx.obj.get_data_context_from_config_file() cli_event_noun: str = "datasource" ( begin_event_name, end_event_name, ) = UsageStatsEvents.get_cli_begin_and_end_event_names( noun=cli_event_noun, verb=ctx.invoked_subcommand, ) send_usage_message( data_context=ctx.obj.data_context, event=begin_event_name, success=True, ) ctx.obj.usage_event_end = end_event_name
def checkpoint_new(checkpoint, suite, directory, datasource) -> None: """Create a new checkpoint for easy deployments. (Experimental)""" suite_name = suite usage_event = "cli.checkpoint.new" context = toolkit.load_data_context_with_error_handling(directory) _verify_checkpoint_does_not_exist(context, checkpoint, usage_event) suite: ExpectationSuite = toolkit.load_expectation_suite( context, suite_name, usage_event ) datasource = toolkit.select_datasource(context, datasource_name=datasource) if datasource is None: send_usage_message( data_context=context, event=usage_event, api_version="v2", success=False, ) sys.exit(1) _, _, _, batch_kwargs = toolkit.get_batch_kwargs(context, datasource.name) _ = context.add_checkpoint( name=checkpoint, **{ "class_name": "LegacyCheckpoint", "batches": [ { "batch_kwargs": dict(batch_kwargs), "expectation_suite_names": [suite.expectation_suite_name], } ], }, ) cli_message( f"""<green>A Checkpoint named `{checkpoint}` was added to your project!</green> - To run this Checkpoint, run `great_expectations --v2-api checkpoint run {checkpoint}`""" ) send_usage_message( data_context=context, event=usage_event, api_version="v2", success=True, )
def checkpoint_script(ctx: click.Context, checkpoint: str) -> None: """ Create a python script to run a Checkpoint. Checkpoints can be run directly without this script using the `great_expectations Checkpoint run` command. This script is provided for those who wish to run Checkpoints via python. """ context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end toolkit.validate_checkpoint(context=context, checkpoint_name=checkpoint, usage_event=usage_event_end) script_name: str = f"run_{checkpoint}.py" script_path: str = os.path.join(context.root_directory, context.GE_UNCOMMITTED_DIR, script_name) if os.path.isfile(script_path): toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message= f"""<red>Warning! A script named {script_name} already exists and this command will not overwrite it.</red> - Existing file path: {script_path}""", ) _write_checkpoint_script_to_disk( context_directory=context.root_directory, checkpoint_name=checkpoint, script_path=script_path, ) cli_message( f"""<green>A python script was created that runs the Checkpoint named: `{checkpoint}`</green> - The script is located in `great_expectations/uncommitted/run_{checkpoint}.py` - The script can be run with `python great_expectations/uncommitted/run_{checkpoint}.py`""" ) send_usage_message( data_context=context, event=usage_event_end, success=True, )
def _checkpoint_new(ctx, checkpoint_name, jupyter): context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: _verify_checkpoint_does_not_exist(context, checkpoint_name, usage_event_end) # Create notebook on disk notebook_name = f"edit_checkpoint_{checkpoint_name}.ipynb" notebook_file_path = _get_notebook_path(context, notebook_name) checkpoint_new_notebook_renderer = CheckpointNewNotebookRenderer( context=context, checkpoint_name=checkpoint_name ) checkpoint_new_notebook_renderer.render_to_disk( notebook_file_path=notebook_file_path ) if not jupyter: cli_message( f"To continue editing this Checkpoint, run <green>jupyter notebook {notebook_file_path}</green>" ) send_usage_message( data_context=context, event=usage_event_end, success=True, ) if jupyter: cli_message( """<green>Because you requested to create a new Checkpoint, we'll open a notebook for you now to edit it! If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""" ) toolkit.launch_jupyter_notebook(notebook_file_path) except Exception as e: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, message=f"<red>{e}</red>", ) return