def build_docs( context: DataContext, usage_stats_event: str, site_names: Optional[List[str]] = None, view: Optional[bool] = True, assume_yes: Optional[bool] = False, ): """Build documentation in a context""" logger.debug("Starting cli.datasource.build_docs") index_page_locator_infos: Dict[str, str] = context.build_data_docs( site_names=site_names, dry_run=True) msg: str = "\nThe following Data Docs sites will be built:\n\n" for site_name, index_page_locator_info in index_page_locator_infos.items(): msg += " - <cyan>{}:</cyan> ".format(site_name) msg += "{}\n".format(index_page_locator_info) cli_message(msg) if not assume_yes: toolkit.confirm_proceed_or_exit(data_context=context, usage_stats_event=usage_stats_event) cli_message("\nBuilding Data Docs...\n") context.build_data_docs(site_names=site_names) cli_message("Done building Data Docs") if view and site_names: for site_to_open in site_names: context.open_data_docs(site_name=site_to_open, only_if_exists=True)
def build_docs(context, site_name=None, view=True, assume_yes=False): """Build documentation in a context""" logger.debug("Starting cli.datasource.build_docs") if site_name is not None: site_names = [site_name] else: site_names = None index_page_locator_infos = context.build_data_docs(site_names=site_names, dry_run=True) msg = "\nThe following Data Docs sites will be built:\n\n" for site_name, index_page_locator_info in index_page_locator_infos.items(): msg += " - <cyan>{}:</cyan> ".format(site_name) msg += "{}\n".format(index_page_locator_info) cli_message(msg) if not assume_yes: toolkit.confirm_proceed_or_exit() cli_message("\nBuilding Data Docs...\n") index_page_locator_infos = context.build_data_docs(site_names=site_names) cli_message("Done building Data Docs") if view: context.open_data_docs(site_name=site_name, only_if_exists=True)
def delete_datasource(ctx, datasource): """Delete the datasource specified as an argument""" context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end if not ctx.obj.assume_yes: toolkit.confirm_proceed_or_exit( confirm_prompt=f"""\nAre you sure you want to delete the Datasource "{datasource}" (this action is irreversible)?" """, continuation_message=f"Datasource `{datasource}` was not deleted.", exit_on_no=True, data_context=context, usage_stats_event=usage_event_end, ) try: context.delete_datasource(datasource) except ValueError: cli_message(f"<red>Datasource {datasource} could not be found.</red>") toolkit.send_usage_message(context, event=usage_event_end, success=False) sys.exit(1) try: context.get_datasource(datasource) except ValueError: cli_message("<green>{}</green>".format("Datasource deleted successfully.")) toolkit.send_usage_message(context, event=usage_event_end, success=True) sys.exit(0)
def build_docs(context, site_name=None, view=True): """Build documentation in a context""" logger.debug("Starting cli.datasource.build_docs") if site_name is not None: site_names = [site_name] else: site_names = None index_page_locator_infos = context.build_data_docs( site_names=site_names, dry_run=True ) msg = "\nThe following Data Docs sites will be built:\n\n" for site_name, index_page_locator_info in index_page_locator_infos.items(): if index_page_locator_info.startswith("file"): if os.path.isfile(index_page_locator_info[6:]): msg += " - <cyan>{}:</cyan> ".format(site_name) msg += "{}\n".format(index_page_locator_info) else: msg += " - <cyan>{}:</cyan> ".format(site_name) msg += "{}\n".format( "Site doesn’t exist or is inaccessible at " + index_page_locator_info + ". If you" + " just built data docs," + " please check permissions." ) else: r = requests.get(index_page_locator_info, stream=True) if r == 200: msg += " - <cyan>{}:</cyan> ".format(site_name) msg += "{}\n".format(index_page_locator_info) else: msg += " - <cyan>{}:</cyan> ".format(site_name) msg += "{}\n".format( "Site doesn’t exist or is inaccessible at " + index_page_locator_info + ". If you" + " just built data docs," + " please check permissions." ) cli_message(msg) toolkit.confirm_proceed_or_exit() cli_message("\nBuilding Data Docs...\n") index_page_locator_infos = context.build_data_docs(site_names=site_names) cli_message("Done building Data Docs") if view: context.open_data_docs(site_name=site_name, only_if_exists=True)
def library_install_load_check( python_import_name: str, pip_library_name: str ) -> Union[int, None]: """ Dynamically load a module from strings, attempt a pip install or raise a helpful error. :return: True if the library was loaded successfully, False otherwise Args: pip_library_name: name of the library to load python_import_name (str): a module to import to verify installation """ if is_library_loadable(library_name=python_import_name): return None confirm_prompt: str = f"""Great Expectations relies on the library `{python_import_name}` to connect to your data, \ but the package `{pip_library_name}` containing this library is not installed. Would you like Great Expectations to try to execute `pip install {pip_library_name}` for you?""" continuation_message: str = f"""\nOK, exiting now. - Please execute `pip install {pip_library_name}` before trying again.""" pip_install_confirmed = toolkit.confirm_proceed_or_exit( confirm_prompt=confirm_prompt, continuation_message=continuation_message, exit_on_no=True, exit_code=1, ) if not pip_install_confirmed: cli_message(continuation_message) sys.exit(1) status_code: int = execute_shell_command_with_progress_polling( f"pip install {pip_library_name}" ) # project_distribution: Distribution = get_project_distribution() # if project_distribution: # project_name: str = project_distribution.metadata['Name'] # version: str = project_distribution.metadata['Version'] # # pkg_resources.working_set = pkg_resources.WorkingSet._build_master() working_set: WorkingSet = pkg_resources.working_set # noinspection SpellCheckingInspection distr: Distribution = pkg_resources.get_distribution(dist=pip_library_name) pkg_resources.WorkingSet.add_entry(self=working_set, entry=distr.key) library_loadable: bool = is_library_loadable(library_name=python_import_name) if status_code == 0 and library_loadable: return 0 if not library_loadable: cli_message( f"""<red>ERROR: Great Expectations relies on the library `{pip_library_name}` to connect to your data.</red> - Please execute `pip install {pip_library_name}` before trying again.""" ) return 1 return status_code
def suite_delete(ctx: click.Context, suite: str) -> None: """ Delete an Expectation Suite from the Expectation Store. """ context: DataContext = ctx.obj.data_context usage_event_end: str = ctx.obj.usage_event_end try: suite_names: List[str] = context.list_expectation_suite_names() except Exception as e: send_usage_message( data_context=context, event=usage_event_end, success=False, ) raise e if not suite_names: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, suppress_usage_message=False, message="<red>No expectation suites found in the project.</red>", ) if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( data_context=context, usage_event=usage_event_end, suppress_usage_message=False, message=f"<red>No expectation suite named {suite} found.</red>", ) if not ( ctx.obj.assume_yes or toolkit.confirm_proceed_or_exit( exit_on_no=False, data_context=context, usage_stats_event=usage_event_end ) ): cli_message(string=f"Suite `{suite}` was not deleted.") sys.exit(0) context.delete_expectation_suite(suite) cli_message(string=f"Deleted the expectation suite named: {suite}") send_usage_message( data_context=context, event=usage_event_end, success=True, )
def _add_spark_datasource( context, passthrough_generator_only=True, prompt_for_datasource_name=True ): toolkit.send_usage_message( data_context=context, event="cli.new_ds_choice", event_payload={"type": "spark"}, success=True, ) if not _verify_pyspark_dependent_modules(): return None if passthrough_generator_only: datasource_name = "files_spark_datasource" # configuration = SparkDFDatasource.build_configuration(batch_kwargs_generators={ # "default": { # "class_name": "PassthroughGenerator", # } # } # ) configuration = SparkDFDatasource.build_configuration() else: path = click.prompt( msg_prompt_filesys_enter_base_path, type=click.Path(exists=True, file_okay=False), ).strip() if path.startswith("./"): path = path[2:] if path.endswith("/"): basenamepath = path[:-1] else: basenamepath = path datasource_name = os.path.basename(basenamepath) + "__dir" if prompt_for_datasource_name: datasource_name = click.prompt( msg_prompt_datasource_name, default=datasource_name ) configuration = SparkDFDatasource.build_configuration( batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": os.path.join("..", path), } } ) configuration["class_name"] = "SparkDFDatasource" configuration["module_name"] = "great_expectations.datasource" errors = DatasourceConfigSchema().validate(configuration) if len(errors) != 0: raise ge_exceptions.GreatExpectationsError( "Invalid Datasource configuration: {:s}".format(errors) ) cli_message( """ Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml: {:s} """.format( datasource_name, textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), " "), ) ) toolkit.confirm_proceed_or_exit() context.add_datasource(name=datasource_name, **configuration) return datasource_name
def _add_sqlalchemy_datasource(context, prompt_for_datasource_name=True): msg_success_database = ( "\n<green>Great Expectations connected to your database!</green>" ) if not _verify_sqlalchemy_dependent_modules(): return None db_choices = [str(x) for x in list(range(1, 1 + len(SupportedDatabases)))] selected_database = ( int( click.prompt( msg_prompt_choose_database, type=click.Choice(db_choices), show_choices=False, ) ) - 1 ) # don't show user a zero index list :) selected_database = list(SupportedDatabases)[selected_database] toolkit.send_usage_message( data_context=context, event="cli.new_ds_choice", event_payload={"type": "sqlalchemy", "db": selected_database.name}, success=True, ) datasource_name = "my_{}_db".format(selected_database.value.lower()) if selected_database == SupportedDatabases.OTHER: datasource_name = "my_database" if prompt_for_datasource_name: datasource_name = click.prompt( msg_prompt_datasource_name, default=datasource_name ) credentials = {} # Since we don't want to save the database credentials in the config file that will be # committed in the repo, we will use our Variable Substitution feature to store the credentials # in the credentials file (that will not be committed, since it is in the uncommitted directory) # with the datasource's name as the variable name. # The value of the datasource's "credentials" key in the config file (great_expectations.yml) will # be ${datasource name}. # Great Expectations will replace the ${datasource name} with the value from the credentials file in runtime. while True: cli_message(msg_db_config.format(datasource_name)) if selected_database == SupportedDatabases.MYSQL: if not _verify_mysql_dependent_modules(): return None credentials = _collect_mysql_credentials(default_credentials=credentials) elif selected_database == SupportedDatabases.POSTGRES: if not _verify_postgresql_dependent_modules(): return None credentials = _collect_postgres_credentials(default_credentials=credentials) elif selected_database == SupportedDatabases.REDSHIFT: if not _verify_redshift_dependent_modules(): return None credentials = _collect_redshift_credentials(default_credentials=credentials) elif selected_database == SupportedDatabases.SNOWFLAKE: if not _verify_snowflake_dependent_modules(): return None credentials = _collect_snowflake_credentials( default_credentials=credentials ) elif selected_database == SupportedDatabases.BIGQUERY: if not _verify_bigquery_dependent_modules(): return None credentials = _collect_bigquery_credentials(default_credentials=credentials) elif selected_database == SupportedDatabases.OTHER: sqlalchemy_url = click.prompt( """What is the url/connection string for the sqlalchemy connection? (reference: https://docs.sqlalchemy.org/en/latest/core/engines.html#database-urls) """, show_default=False, ).strip() credentials = {"url": sqlalchemy_url} context.save_config_variable(datasource_name, credentials) message = """ <red>Cannot connect to the database.</red> - Please check your environment and the configuration you provided. - Database Error: {0:s}""" try: cli_message( "<cyan>Attempting to connect to your database. This may take a moment...</cyan>" ) configuration = SqlAlchemyDatasource.build_configuration( credentials="${" + datasource_name + "}" ) configuration["class_name"] = "SqlAlchemyDatasource" configuration["module_name"] = "great_expectations.datasource" errors = DatasourceConfigSchema().validate(configuration) if len(errors) != 0: raise ge_exceptions.GreatExpectationsError( "Invalid Datasource configuration: {:s}".format(errors) ) cli_message( """ Great Expectations will now add a new Datasource '{0:s}' to your deployment, by adding this entry to your great_expectations.yml: {1:s} The credentials will be saved in uncommitted/config_variables.yml under the key '{0:s}' """.format( datasource_name, textwrap.indent( toolkit.yaml.dump({datasource_name: configuration}), " " ), ) ) toolkit.confirm_proceed_or_exit() context.add_datasource(name=datasource_name, **configuration) cli_message(msg_success_database) break except ModuleNotFoundError as de: cli_message(message.format(str(de))) return None except DatasourceInitializationError as de: cli_message(message.format(str(de))) if not click.confirm("Enter the credentials again?", default=True): context.add_datasource( datasource_name, initialize=False, module_name="great_expectations.datasource", class_name="SqlAlchemyDatasource", data_asset_type={"class_name": "SqlAlchemyDataset"}, credentials="${" + datasource_name + "}", ) # TODO this message about continuing may not be accurate cli_message( """ We saved datasource {:s} in {:s} and the credentials you entered in {:s}. Since we could not connect to the database, you can complete troubleshooting in the configuration files documented in the how-to guides here: <blue>https://docs.greatexpectations.io/en/latest/guides/how_to_guides/configuring_datasources.html?utm_source=cli&utm_medium=init&utm_campaign={:s}#{:s}</blue> . After you connect to the datasource, run great_expectations init to continue. """.format( datasource_name, DataContext.GE_YML, context.get_config()["config_variables_file_path"], rtd_url_ge_version, selected_database.value.lower(), ) ) return None return datasource_name
def _add_pandas_datasource( context, passthrough_generator_only=True, prompt_for_datasource_name=True ): toolkit.send_usage_message( data_context=context, event="cli.new_ds_choice", event_payload={"type": "pandas"}, success=True, ) if passthrough_generator_only: datasource_name = "files_datasource" configuration = PandasDatasource.build_configuration() else: path = click.prompt( msg_prompt_filesys_enter_base_path, type=click.Path(exists=True, file_okay=False), ) if path.startswith("./"): path = path[2:] if path.endswith("/"): basenamepath = path[:-1] else: basenamepath = path datasource_name = os.path.basename(basenamepath) + "__dir" if prompt_for_datasource_name: datasource_name = click.prompt( msg_prompt_datasource_name, default=datasource_name ) configuration = PandasDatasource.build_configuration( batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": os.path.join("..", path), } } ) configuration["class_name"] = "PandasDatasource" configuration["module_name"] = "great_expectations.datasource" errors = DatasourceConfigSchema().validate(configuration) if len(errors) != 0: raise ge_exceptions.GreatExpectationsError( "Invalid Datasource configuration: {:s}".format(errors) ) cli_message( """ Great Expectations will now add a new Datasource '{:s}' to your deployment, by adding this entry to your great_expectations.yml: {:s} """.format( datasource_name, textwrap.indent(toolkit.yaml.dump({datasource_name: configuration}), " "), ) ) toolkit.confirm_proceed_or_exit( continuation_message="Okay, exiting now. To learn more about adding datasources, run great_expectations " "datasource --help or visit https://docs.greatexpectations.io/" ) context.add_datasource(name=datasource_name, **configuration) return datasource_name