def generate_basic_suites( kedro_context: KedroContext, ge_context: DataContext, empty=False, replace=False, batch_kwargs=None, ): from great_expectations.profile import BasicSuiteBuilderProfiler if batch_kwargs is None: batch_kwargs = {} catalog = kedro_context.catalog existing_datasource_names = { ds["name"] for ds in ge_context.list_datasources() } for dataset_name in catalog.list(): suite_name = generate_basic_suite_name(dataset_name) if suite_name in ge_context.list_expectation_suite_names( ) and not replace: continue datasource_name = generate_datasource_name(dataset_name) if datasource_name not in existing_datasource_names: continue dataset = catalog._get_dataset(dataset_name) data_path = str(dataset._filepath) dataasset_name, _ = os.path.splitext(os.path.basename(data_path)) suite_batch_kwargs = { "datasource": datasource_name, "data_asset_name": dataasset_name, "path": data_path, "reader_options": dataset._load_args, } batch_kwargs_generator_name = "path" profiler_configuration = "demo" additional_batch_kwargs = batch_kwargs run_id = datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") if empty: create_empty_suite(ge_context, suite_name, suite_batch_kwargs) else: ge_context.profile_data_asset( datasource_name, batch_kwargs_generator_name=batch_kwargs_generator_name, data_asset_name=dataasset_name, batch_kwargs=suite_batch_kwargs, profiler=BasicSuiteBuilderProfiler, profiler_configuration=profiler_configuration, expectation_suite_name=suite_name, run_id=run_id, additional_batch_kwargs=additional_batch_kwargs, )
def test_suite_demo_enter_existing_suite_name_as_arg( mock_webbrowser, mock_subprocess, caplog, data_context_parameterized_expectation_suite, ): """ We call the "suite demo" command with the name of an existing expectation suite in the --suite argument The command should: - exit with a clear error message - NOT open Data Docs - NOT open jupyter """ not_so_empty_data_context = data_context_parameterized_expectation_suite project_root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(project_root_dir, "uncommitted")) context = DataContext(project_root_dir) existing_suite_name = "my_dag_node.default" assert context.list_expectation_suite_names() == [existing_suite_name] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "suite", "demo", "-d", project_root_dir, "--suite", existing_suite_name, "--no-view", ], catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 1 assert ( f"An expectation suite named `{existing_suite_name}` already exists." in stdout ) assert ( f"If you intend to edit the suite please use `great_expectations suite edit {existing_suite_name}`" in stdout ) assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def suite_list(directory): """Lists available Expectation Suites.""" try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("<red>{}</red>".format(err.message)) return try: suite_names = [ " - <cyan>{}</cyan>".format(suite_name) for suite_name in context.list_expectation_suite_names() ] if len(suite_names) == 0: cli_message("No Expectation Suites found") send_usage_message( data_context=context, event="cli.suite.list", success=True ) return if len(suite_names) == 1: list_intro_string = "1 Expectation Suite found:" if len(suite_names) > 1: list_intro_string = "{} Expectation Suites found:".format(len(suite_names)) cli_message_list(suite_names, list_intro_string) send_usage_message( data_context=context, event="cli.suite.list", success=True ) except Exception as e: send_usage_message( data_context=context, event="cli.suite.list", success=False ) raise e
def suite_list(directory): """Lists available expectation suites.""" try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("<red>{}</red>".format(err.message)) return suite_names = context.list_expectation_suite_names() if len(suite_names) == 0: cli_message("No expectation suites found") return if len(suite_names) == 1: cli_message("1 expectation suite found:") if len(suite_names) > 1: cli_message("{} expectation suites found:".format(len(suite_names))) for name in suite_names: cli_message("\t{}".format(name))
def suite_list(directory): """Lists available expectation suites.""" try: context = DataContext(directory) except ge_exceptions.ConfigNotFoundError as err: cli_message("<red>{}</red>".format(err.message)) return except ge_exceptions.ZeroDotSevenConfigVersionError as err: _offer_to_install_new_template(err, context.root_directory) return suite_names = context.list_expectation_suite_names() if len(suite_names) == 0: cli_message("No expectation suites available") return if len(suite_names) == 1: cli_message("1 expectation suite available:") if len(suite_names) > 1: cli_message("{} expectation suites available:".format(len(suite_names))) for name in suite_names: cli_message("\t{}".format(name))
def test_suite_new_empty_with_no_jupyter( mock_webbroser, mock_subprocess, caplog, data_context_parameterized_expectation_suite, filesystem_csv_2, ): """ Running "suite new --no-jupyter" should: - make an empty suite - NOT open jupyter - NOT open data docs """ os.mkdir( os.path.join( data_context_parameterized_expectation_suite.root_directory, "uncommitted" ) ) root_dir = data_context_parameterized_expectation_suite.root_directory runner = CliRunner(mix_stderr=False) csv = os.path.join(filesystem_csv_2, "f1.csv") # TODO this test must be updated to remove the --empty flag in the next major release result = runner.invoke( cli, ["suite", "new", "-d", root_dir, "--empty", "--suite", "foo", "--no-jupyter"], input=f"{csv}\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "Enter the path" in stdout assert "Name the new expectation suite" not in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations" not in stdout ) assert "Generating example Expectation Suite..." not in stdout assert "The following Data Docs sites were built" not in stdout assert ( "Great Expectations will create a new Expectation Suite 'foo' and store it here" in stdout ) assert "open a notebook for you now" not in stdout expected_suite_path = os.path.join(root_dir, "expectations", "foo.json") assert os.path.isfile(expected_suite_path) expected_notebook = os.path.join(root_dir, "uncommitted", "edit_foo.ipynb") assert os.path.isfile(expected_notebook) context = DataContext(root_dir) assert "foo" in context.list_expectation_suite_names() suite = context.get_expectation_suite("foo") assert suite.expectations == [] citations = suite.get_citations() citations[0].pop("citation_date") assert citations[0] == { "batch_kwargs": { "data_asset_name": "f1", "datasource": "mydatasource", "path": csv, "reader_method": "read_csv", }, "batch_markers": None, "batch_parameters": None, "comment": "New suite added via CLI", } assert mock_subprocess.call_count == 0 assert mock_webbroser.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_demo_answer_suite_name_prompts_with_name_of_existing_suite( mock_webbrowser, mock_subprocess, caplog, data_context_parameterized_expectation_suite, filesystem_csv_2, ): """ We call the "suite demo" command without the suite name argument The command should: - prompt us to enter the name of the expectation suite that will be created. We answer the prompt with the name of an existing expectation suite. - display an error message and let us retry until we answer with a name that is not "taken". - create an example suite - NOT open jupyter - open DataDocs to the new example suite page """ not_so_empty_data_context = data_context_parameterized_expectation_suite root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(root_dir, "uncommitted")) runner = CliRunner(mix_stderr=False) csv_path = os.path.join(filesystem_csv_2, "f1.csv") existing_suite_name = "my_dag_node.default" context = DataContext(root_dir) assert context.list_expectation_suite_names() == [existing_suite_name] result = runner.invoke( cli, ["suite", "demo", "-d", root_dir], input=f"{csv_path}\n{existing_suite_name}\nmy_new_suite\n\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert ( f"An expectation suite named `{existing_suite_name}` already exists." in stdout ) assert ( f"If you intend to edit the suite please use `great_expectations suite edit {existing_suite_name}`" in stdout ) assert "Enter the path" in stdout assert "Name the new Expectation Suite [f1.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "The following Data Docs sites will be built" in stdout assert ( "Great Expectations will store these expectations in a new Expectation Suite 'my_new_suite' here" in stdout ) assert "open a notebook for you now" not in stdout expected_suite_path = os.path.join(root_dir, "expectations", "my_new_suite.json") assert os.path.isfile(expected_suite_path) assert mock_subprocess.call_count == 0 assert mock_webbrowser.call_count == 1 foo = os.path.join( root_dir, "uncommitted/data_docs/local_site/validations/my_new_suite/" ) assert f"file://{foo}" in mock_webbrowser.call_args[0][0] assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_new_empty_suite_creates_empty_suite(mock_webbroser, mock_subprocess, caplog, data_context, filesystem_csv_2): """ Running "suite new --empty" should: - make an empty suite - open jupyter - NOT open data docs """ project_root_dir = data_context.root_directory os.mkdir(os.path.join(project_root_dir, "uncommitted")) root_dir = project_root_dir os.chdir(root_dir) runner = CliRunner(mix_stderr=False) csv = os.path.join(filesystem_csv_2, "f1.csv") result = runner.invoke( cli, ["suite", "new", "-d", root_dir, "--empty", "--suite", "foo"], input=f"{csv}\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "Enter the path" in stdout assert "Name the new expectation suite" not in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations" not in stdout) assert "Generating example Expectation Suite..." not in stdout assert "The following Data Docs sites were built" not in stdout assert "A new Expectation suite 'foo' was added to your project" in stdout assert ( "Because you requested an empty suite, we'll open a notebook for you now to edit it!" in stdout) expected_suite_path = os.path.join(root_dir, "expectations", "foo.json") assert os.path.isfile(expected_suite_path) expected_notebook = os.path.join(root_dir, "uncommitted", "foo.ipynb") assert os.path.isfile(expected_notebook) context = DataContext(root_dir) assert "foo" in context.list_expectation_suite_names() suite = context.get_expectation_suite("foo") assert suite.expectations == [] citations = suite.get_citations() citations[0].pop("citation_date") assert citations[0] == { "batch_kwargs": { "datasource": "mydatasource", "path": csv, "reader_method": "read_csv", }, "batch_markers": None, "batch_parameters": None, "comment": "New suite added via CLI", } assert mock_subprocess.call_count == 1 call_args = mock_subprocess.call_args[0][0] assert call_args[0] == "jupyter" assert call_args[1] == "notebook" assert expected_notebook in call_args[2] assert mock_webbroser.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)