def test_parse_cli_config_file_location_posix_paths_existing_files_with_no_extension( tmp_path_factory, ): filename_no_extension_fixtures = [ { "input_path": "relative/path/to/file/no_extension", "expected": { "directory": "relative/path/to/file", "filename": "no_extension", }, }, { "input_path": "/absolute/path/to/file/no_extension", "expected": { "directory": "/absolute/path/to/file", "filename": "no_extension", }, }, { "input_path": "no_extension", "expected": { "directory": None, "filename": "no_extension", }, }, ] for fixture in filename_no_extension_fixtures: with pytest.raises(ge_exceptions.ConfigNotFoundError): toolkit.parse_cli_config_file_location(fixture["input_path"]) # Create files and re-run assertions root_dir = tmp_path_factory.mktemp("posix") root_dir = str(root_dir) for fixture in filename_no_extension_fixtures: expected_dir = fixture.get("expected").get("directory") # Make non-absolute path if expected_dir is not None and expected_dir.startswith("/"): expected_dir = expected_dir[1:] expected_filename = fixture.get("expected").get("filename") if expected_dir: test_directory = os.path.join(root_dir, expected_dir) os.makedirs(test_directory, exist_ok=True) if expected_filename: expected_filepath = os.path.join(test_directory, expected_filename) with open(expected_filepath, "w") as fp: pass output = toolkit.parse_cli_config_file_location( expected_filepath) assert output == { "directory": os.path.join(root_dir, expected_dir), "filename": expected_filename, }
def checkpoint(ctx): """ Checkpoint operations A Checkpoint is a bundle of one or more batches of data with one or more Expectation Suites. A Checkpoint can be as simple as one batch of data paired with one Expectation Suite. A Checkpoint can be as complex as many batches of data across different datasources paired with one or more Expectation Suites each. """ directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context usage_stats_prefix = f"cli.checkpoint.{ctx.invoked_subcommand}" toolkit.send_usage_message( data_context=context, event=f"{usage_stats_prefix}.begin", success=True, ) ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
def suite_list(ctx): """Lists available Expectation Suites.""" display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context = toolkit.load_data_context_with_error_handling(directory) try: suite_names = [ " - <cyan>{}</cyan>".format(suite_name) for suite_name in context.list_expectation_suite_names() ] if len(suite_names) == 0: cli_message("No Expectation Suites found") toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=True) return elif len(suite_names) == 1: list_intro_string = "1 Expectation Suite found:" else: list_intro_string = "{} Expectation Suites found:".format( len(suite_names)) cli_message_list(suite_names, list_intro_string) toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=True) except Exception as e: toolkit.send_usage_message(data_context=context, event="cli.suite.list", success=False) raise e
def suite_delete(ctx, suite): """ Delete an expectation suite from the expectation store. """ display_not_implemented_message_and_exit() usage_event = "cli.suite.delete" directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context = toolkit.load_data_context_with_error_handling(directory) suite_names = context.list_expectation_suite_names() if not suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, "</red>No expectation suites found in the project.</red>", ) if suite not in suite_names: toolkit.exit_with_failure_message_and_stats( context, usage_event, f"No expectation suite named {suite} found.") context.delete_expectation_suite(suite) cli_message(f"Deleted the expectation suite named: {suite}") toolkit.send_usage_message(data_context=context, event=usage_event, success=True)
def test_parse_cli_config_file_location_empty_paths(): posix_fixtures = [ { "input_path": None, "expected": { "directory": None, "filename": None, }, }, { "input_path": "", "expected": { "directory": None, "filename": None, }, }, ] fixtures = posix_fixtures for fixture in fixtures: assert ( toolkit.parse_cli_config_file_location(fixture["input_path"]) == fixture["expected"] )
def suite_edit(ctx, suite, datasource, jupyter, batch_kwargs): """ Generate a Jupyter notebook for editing an existing Expectation Suite. The SUITE argument is required. This is the name you gave to the suite when you created it. A batch of data is required to edit the suite, which is used as a sample. The edit command will help you specify a batch interactively. Or you can specify them manually by providing --batch-kwargs in valid JSON format. Read more about specifying batches of data in the documentation: https://docs.greatexpectations.io/ """ display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") _suite_edit( suite, datasource, directory, jupyter, batch_kwargs, usage_event="cli.suite.edit", )
def get_data_context_from_config_file(self) -> DataContext: directory: str = toolkit.parse_cli_config_file_location( config_file_location=self.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) return context
def store(ctx): """Store operations""" directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context
def project_upgrade(ctx): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): up_to_date_message = ( "Your project is up-to-date - no further upgrade is necessary.\n") cli_message(f"<green>{up_to_date_message}</green>") sys.exit(0)
def project_upgrade(ctx): """Upgrade a project after installing the next Great Expectations major version.""" cli_message("\nChecking project...") cli_message(SECTION_SEPARATOR) directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if load_data_context_with_error_handling(directory=directory, from_cli_upgrade_command=True): sys.exit(0) else: failure_message = "Error: Your project could not be upgraded.\n" cli_message(f"<red>{failure_message}</red>") sys.exit(1)
def project_check_config(ctx): """Check a config for validity and help with migrations.""" cli_message("Checking your config files for validity...\n") directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") is_config_ok, error_message, context = do_config_check(directory) if context: toolkit.send_usage_message(data_context=context, event="cli.project.check_config", success=True) if not is_config_ok: cli_message("Unfortunately, your config appears to be invalid:\n") cli_message(f"<red>{error_message}</red>") sys.exit(1) cli_message("<green>Your config file appears valid!</green>")
def datasource(ctx): """Datasource operations""" directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context usage_stats_prefix = f"cli.datasource.{ctx.invoked_subcommand}" toolkit.send_usage_message( data_context=context, event=f"{usage_stats_prefix}.begin", success=True, ) ctx.obj.usage_event_end = f"{usage_stats_prefix}.end"
def suite_new(ctx, suite, jupyter, batch_kwargs): """ Create a new empty Expectation Suite. Edit in jupyter notebooks, or skip with the --no-jupyter flag """ display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") _suite_new( suite=suite, directory=directory, empty=True, jupyter=jupyter, view=False, batch_kwargs=batch_kwargs, usage_event="cli.suite.new", )
def checkpoint(ctx): """ Checkpoint operations A Checkpoint is a bundle of one or more batches of data with one or more Expectation Suites. A Checkpoint can be as simple as one batch of data paired with one Expectation Suite. A Checkpoint can be as complex as many batches of data across different datasources paired with one or more Expectation Suites each. """ directory: str = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") context: DataContext = toolkit.load_data_context_with_error_handling( directory=directory, from_cli_upgrade_command=False, ) # TODO consider moving this all the way up in to the CLIState constructor ctx.obj.data_context = context
def test_parse_cli_config_file_location_posix_paths(tmp_path_factory): """ What does this test and why? We want to parse posix paths into their directory and filename parts so that we can pass the directory to our data context constructor. We need to be able to do that with all versions of path that can be input. This tests for posix paths for files/dirs that don't exist and files/dirs that do. Other tests handle testing for windows support. """ filename_fixtures = [ { "input_path": "just_a_file.yml", "expected": { "directory": "", "filename": "just_a_file.yml", }, }, ] absolute_path_fixtures = [ { "input_path": "/path/to/file/filename.yml", "expected": { "directory": "/path/to/file", "filename": "filename.yml", }, }, { "input_path": "/absolute/directory/ending/slash/", "expected": { "directory": "/absolute/directory/ending/slash/", "filename": None, }, }, { "input_path": "/absolute/directory/ending/no/slash", "expected": { "directory": "/absolute/directory/ending/no/slash", "filename": None, }, }, ] relative_path_fixtures = [ { "input_path": "relative/path/to/file.yml", "expected": { "directory": "relative/path/to", "filename": "file.yml", }, }, { "input_path": "relative/path/to/directory/slash/", "expected": { "directory": "relative/path/to/directory/slash/", "filename": None, }, }, { "input_path": "relative/path/to/directory/no_slash", "expected": { "directory": "relative/path/to/directory/no_slash", "filename": None, }, }, ] fixtures = filename_fixtures + absolute_path_fixtures + relative_path_fixtures for fixture in fixtures: with pytest.raises(ge_exceptions.ConfigNotFoundError): toolkit.parse_cli_config_file_location(fixture["input_path"]) # Create files and re-run assertions root_dir = tmp_path_factory.mktemp("posix") root_dir = str(root_dir) for fixture in fixtures: expected_dir = fixture.get("expected").get("directory") # Make non-absolute path if expected_dir is not None and expected_dir.startswith("/"): expected_dir = expected_dir[1:] expected_filename = fixture.get("expected").get("filename") if expected_dir: test_directory = os.path.join(root_dir, expected_dir) os.makedirs(test_directory, exist_ok=True) if expected_filename: expected_filepath = os.path.join(test_directory, expected_filename) with open(expected_filepath, "w") as fp: pass output = toolkit.parse_cli_config_file_location(expected_filepath) assert output == { "directory": os.path.join(root_dir, expected_dir), "filename": expected_filename, }
def test_parse_cli_config_file_location_windows_paths(tmp_path_factory): """ What does this test and why? Since we are unable to test windows paths on our unix CI, this just tests that if a file doesn't exist we raise an error. Args: tmp_path_factory: Returns: """ filename_fixtures = [ { "input_path": "just_a_file.yml", "expected": { "directory": "", "filename": "just_a_file.yml", }, }, ] absolute_path_fixtures = [ { "input_path": r"C:\absolute\windows\path\to\file.yml", "expected": { "directory": r"C:\absolute\windows\path\to", "filename": "file.yml", }, }, { "input_path": r"C:\absolute\windows\directory\ending\slash\\", "expected": { "directory": r"C:\absolute\windows\directory\ending\slash\\", "filename": None, }, }, { "input_path": r"C:\absolute\windows\directory\ending\no_slash", "expected": { "directory": r"C:\absolute\windows\directory\ending\no_slash", "filename": None, }, }, ] relative_path_fixtures = [ { "input_path": r"relative\windows\path\to\file.yml", "expected": { "directory": r"relative\windows\path\to", "filename": "file.yml", }, }, # Double slash at end of raw string to escape slash { "input_path": r"relative\windows\path\to\directory\slash\\", "expected": { "directory": r"relative\windows\path\to\directory\slash\\", "filename": None, }, }, { "input_path": r"relative\windows\path\to\directory\no_slash", "expected": { "directory": r"relative\windows\path\to\directory\no_slash", "filename": None, }, }, ] fixtures = filename_fixtures + absolute_path_fixtures + relative_path_fixtures for fixture in fixtures: with pytest.raises(ge_exceptions.ConfigNotFoundError): toolkit.parse_cli_config_file_location(fixture["input_path"])
def suite_scaffold(ctx, suite, jupyter): """Scaffold a new Expectation Suite.""" display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") _suite_scaffold(suite, directory, jupyter)
def init(ctx: click.Context, usage_stats: bool) -> None: """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, creates a project file, and appends to a `.gitignore` file. """ directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if directory is None: directory = os.getcwd() target_directory = os.path.abspath(directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): message = ( f"""Warning. An existing `{DataContext.GE_YML}` was found here: {ge_dir}.""" ) warnings.warn(message) try: project_file_structure_exists = ( DataContext.does_config_exist_on_disk(ge_dir) and DataContext.all_uncommitted_directories_exist(ge_dir) and DataContext.config_variables_yml_exist(ge_dir)) if project_file_structure_exists: cli_message(PROJECT_IS_COMPLETE) sys.exit(0) else: # Prompt to modify the project to add missing files if not ctx.obj.assume_yes: if not click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) except (DataContextError, DatasourceInitializationError) as e: cli_message(f"<red>{e.message}</red>") sys.exit(1) try: DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) except DataContextError as e: cli_message(f"<red>{e.message}</red>") # TODO ensure this is covered by a test exit(5) else: if not ctx.obj.assume_yes: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) send_usage_message( data_context=context, event=UsageStatsEvents.CLI_INIT_CREATE.value, success=True, ) except DataContextError as e: # TODO ensure this is covered by a test cli_message(f"<red>{e}</red>") cli_message(SECTION_SEPARATOR) cli_message(READY_FOR_CUSTOMIZATION) cli_message(HOW_TO_CUSTOMIZE) sys.exit(0)
def init(ctx, view, usage_stats): """ Initialize a new Great Expectations project. This guided input walks the user through setting up a new project and also onboards a new developer in an existing project. It scaffolds directories, sets up notebooks, creates a project file, and appends to a `.gitignore` file. """ display_not_implemented_message_and_exit() directory = toolkit.parse_cli_config_file_location( config_file_location=ctx.obj.config_file_location).get("directory") if directory is None: directory = os.getcwd() target_directory = os.path.abspath(directory) ge_dir = _get_full_path_to_ge_dir(target_directory) cli_message(GREETING) if DataContext.does_config_exist_on_disk(ge_dir): try: if DataContext.is_project_initialized(ge_dir): # Ensure the context can be instantiated cli_message(PROJECT_IS_COMPLETE) except (DataContextError, DatasourceInitializationError) as e: cli_message("<red>{}</red>".format(e.message)) sys.exit(1) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) cli_message(ONBOARDING_COMPLETE) # TODO if this is correct, ensure this is covered by a test # cli_message(SETUP_SUCCESS) # exit(0) except DataContextError as e: cli_message("<red>{}</red>".format(e.message)) # TODO ensure this is covered by a test exit(5) else: if not ctx.obj.assume_yes: if not click.confirm(LETS_BEGIN_PROMPT, default=True): cli_message(RUN_INIT_AGAIN) # TODO ensure this is covered by a test exit(0) try: context = DataContext.create(target_directory, usage_statistics_enabled=usage_stats) toolkit.send_usage_message(data_context=context, event="cli.init.create", success=True) except DataContextError as e: # TODO ensure this is covered by a test cli_message("<red>{}</red>".format(e)) # Skip the rest of setup if --assume-yes flag is passed if ctx.obj.assume_yes: cli_message(SECTION_SEPARATOR) cli_message(SETUP_SUCCESS) sys.exit(0) try: # if expectations exist, offer to build docs context = DataContext(ge_dir) if context.list_expectation_suites(): if click.confirm(BUILD_DOCS_PROMPT, default=True): build_docs(context, view=view) else: datasources = context.list_datasources() if len(datasources) == 0: cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to configure a Datasource?", default=True): cli_message("Okay, bye!") sys.exit(1) datasource_name, data_source_type = add_datasource_impl( context, choose_one_data_asset=False) if not datasource_name: # no datasource was created sys.exit(1) datasources = context.list_datasources() if len(datasources) == 1: datasource_name = datasources[0]["name"] cli_message(SECTION_SEPARATOR) if not click.confirm( "Would you like to profile new Expectations for a single data asset within your new Datasource?", default=True, ): cli_message( "Okay, exiting now. To learn more about Profilers, run great_expectations profile --help or visit docs.greatexpectations.io!" ) sys.exit(1) ( success, suite_name, profiling_results, ) = toolkit.create_expectation_suite( context, datasource_name=datasource_name, additional_batch_kwargs={"limit": 1000}, flag_build_docs=False, open_docs=False, ) cli_message(SECTION_SEPARATOR) if not click.confirm("Would you like to build Data Docs?", default=True): cli_message( "Okay, exiting now. To learn more about Data Docs, run great_expectations docs --help or visit docs.greatexpectations.io!" ) sys.exit(1) build_docs(context, view=False) if not click.confirm( "\nWould you like to view your new Expectations in Data Docs? This will open a new browser window.", default=True, ): cli_message( "Okay, exiting now. You can view the site that has been created in a browser, or visit docs.greatexpectations.io for more information!" ) sys.exit(1) toolkit.attempt_to_open_validation_results_in_data_docs( context, profiling_results) cli_message(SECTION_SEPARATOR) cli_message(SETUP_SUCCESS) sys.exit(0) except ( DataContextError, ge_exceptions.ProfilerError, OSError, SQLAlchemyError, ) as e: cli_message("<red>{}</red>".format(e)) sys.exit(1)