示例#1
0
def test_init_on_existing_project_with_datasource_with_existing_suite_offer_to_build_docs_answer_yes(
    mock_webbrowser,
    caplog,
    initialized_sqlite_project,
):
    project_dir = initialized_sqlite_project

    runner = CliRunner(mix_stderr=False)
    with pytest.warns(
            UserWarning,
            match="Warning. An existing `great_expectations.yml` was found"):
        result = runner.invoke(
            cli,
            ["init", "-d", project_dir],
            input="\n\n",
            catch_exceptions=False,
        )
    stdout = result.stdout

    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert ("{}/great_expectations/uncommitted/data_docs/local_site/index.html"
            .format(project_dir) in mock_webbrowser.call_args[0][0])

    assert "Error: invalid input" not in stdout

    assert "Always know what to expect from your data" in stdout
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
示例#2
0
def test_project_upgrade_already_up_to_date(v10_project_directory, caplog):
    # test great_expectations project upgrade command with project with config_version 2

    # copy v2 yml
    shutil.copy(
        file_relative_path(
            __file__,
            "../../../test_fixtures/upgrade_helper/great_expectations_v2.yml"),
        os.path.join(v10_project_directory, "great_expectations.yml"),
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["project", "upgrade", "-d", v10_project_directory],
        input="\n",
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert "Checking project..." in stdout
    assert "Your project is up-to-date - no further upgrade is necessary." in stdout
    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE,
    )
示例#3
0
def test_cli_datasorce_new(caplog, empty_data_context, filesystem_csv_2):
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    assert context.list_datasources() == []

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["datasource", "new", "-d", project_root_dir],
        input="1\n1\n%s\nmynewsource\n" % str(filesystem_csv_2),
        catch_exceptions=False,
    )
    stdout = result.stdout

    assert "What data would you like Great Expectations to connect to?" in stdout
    assert "What are you processing your files with?" in stdout
    assert "Give your new Datasource a short name." in stdout
    assert "A new datasource 'mynewsource' was added to your project." in stdout

    assert result.exit_code == 0

    config_path = os.path.join(project_root_dir, DataContext.GE_YML)
    config = yaml.load(open(config_path))
    datasources = config["datasources"]
    assert "mynewsource" in datasources.keys()
    data_source_class = datasources["mynewsource"]["data_asset_type"][
        "class_name"]
    assert data_source_class == "PandasDataset"
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_list_with_no_checkpoints_with_ge_config_v2(
        mock_emit, caplog, empty_data_context_stats_enabled):
    context = empty_data_context_stats_enabled
    root_dir = context.root_directory
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint list -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 0
    assert "No checkpoints found." in stdout
    assert "Use the command `great_expectations checkpoint new` to create one" in stdout

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.list",
            "event_payload": {
                "api_version": "v2"
            },
            "success": True,
        }),
    ]

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_noninteractive_validation_config_file_does_not_exist(
    caplog,
    data_context_parameterized_expectation_suite_no_checkpoint_store,
    filesystem_csv_2,
):
    """
    Non-nteractive mode. Use the --validation_config_file argument to pass the path
    to a validation config file that does not exist.
    """
    not_so_empty_data_context = (
        data_context_parameterized_expectation_suite_no_checkpoint_store)
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    validation_config_file_path = os.path.join(root_dir, "uncommitted",
                                               "validation_config_1.json")

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "validation-operator",
            "run",
            "-d",
            root_dir,
            "--validation_config_file",
            validation_config_file_path,
        ],
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert "Failed to process the --validation_config_file argument" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_interactive_golden_path(
        caplog, data_context_simple_expectation_suite, filesystem_csv_2):
    """
    Interactive mode golden path - pass an existing suite name and an existing validation
    operator name, select an existing file.
    """
    not_so_empty_data_context = data_context_simple_expectation_suite
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    runner = CliRunner(mix_stderr=False)
    csv_path = os.path.join(filesystem_csv_2, "f1.csv")
    result = runner.invoke(
        cli,
        [
            "validation-operator",
            "run",
            "-d",
            root_dir,
            "--name",
            "default",
            "--suite",
            "default",
        ],
        input=f"{csv_path}\n",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert "Validation failed" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_profile_answering_no(empty_data_context,
                                             titanic_sqlite_db, caplog):
    """
    When datasource profile command is called without additional arguments,
    the command must prompt the user with a confirm (y/n) before profiling.
    We are verifying  that it does that and respects user's "no".
    """
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    datasource_name = "wow_a_datasource"
    context = _add_datasource_and_credentials_to_context(
        context, datasource_name, titanic_sqlite_db)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource", "profile", datasource_name, "-d", project_root_dir,
            "--no-view"
        ],
        input="n\n",
        catch_exceptions=False,
    )

    stdout = result.output
    assert result.exit_code == 0
    assert "Profiling 'wow_a_datasource'" in stdout
    assert "Skipping profiling for now." in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_profile_on_empty_database(empty_data_context,
                                                  empty_sqlite_db, caplog):
    """
    We run the datasource profile command against an empty database (no tables).
    This means that no generator can "see" a list of available data assets.
    The command must exit with an error message saying that no generator can see
    any assets.
    """
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)
    datasource_name = "wow_a_datasource"
    context = _add_datasource_and_credentials_to_context(
        context, datasource_name, empty_sqlite_db)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource", "profile", datasource_name, "-d", project_root_dir,
            "--no-view"
        ],
        input="n\n",
        catch_exceptions=False,
    )

    stdout = result.output
    assert result.exit_code == 1

    assert "Profiling 'wow_a_datasource'" in stdout
    assert "No batch kwargs generators can list available data assets" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_list_with_one_validation_operator(
        caplog, filesystem_csv_data_context_with_validation_operators):
    project_dir = filesystem_csv_data_context_with_validation_operators.root_directory
    runner = CliRunner(mix_stderr=False)

    expected_result = """Heads up! This feature is Experimental. It may change. Please give us your feedback!
1 Validation Operator found:

 - name: action_list_operator
   class_name: ActionListValidationOperator
   action_list: store_validation_result (StoreValidationResultAction) => store_evaluation_params (StoreEvaluationParametersAction) => update_data_docs (UpdateDataDocsAction)"""

    result = runner.invoke(
        cli,
        "validation-operator list -d {}".format(project_dir),
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    # _capture_ansi_codes_to_file(result)
    assert result.output.strip() == expected_result

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE,
    )
def test_validation_operator_run_interactive_pass_non_existing_operator_name(
    caplog,
    data_context_parameterized_expectation_suite_no_checkpoint_store,
    filesystem_csv_2,
):
    """
    Interactive mode: pass an non-existing suite name and an existing validation
    operator name, select an existing file.
    """
    not_so_empty_data_context = (
        data_context_parameterized_expectation_suite_no_checkpoint_store)
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    runner = CliRunner(mix_stderr=False)
    csv_path = os.path.join(filesystem_csv_2, "f1.csv")
    result = runner.invoke(
        cli,
        [
            "validation-operator",
            "run",
            "-d",
            root_dir,
            "--name",
            "this_val_op_does_not_exist",
            "--suite",
            "my_dag_node.default",
        ],
        input=f"{csv_path}\n",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert "Could not find a validation operator" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
示例#11
0
def test_cli_init_on_complete_existing_project_all_uncommitted_dirs_exist(
    mock_webbrowser,
    caplog,
    tmp_path_factory,
):
    """
    This test walks through the onboarding experience.

    The user just checked an existing project out of source control and does
    not yet have an uncommitted directory.
    """
    root_dir = tmp_path_factory.mktemp("hiya")
    root_dir = str(root_dir)
    os.makedirs(os.path.join(root_dir, "data"))
    data_folder_path = os.path.join(root_dir, "data")
    data_path = os.path.join(root_dir, "data", "Titanic.csv")
    fixture_path = file_relative_path(
        __file__, os.path.join("..", "..", "test_sets", "Titanic.csv")
    )
    shutil.copy(fixture_path, data_path)

    # Create a new project from scratch that we will use for the test in the next step

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", root_dir],
        input="\n\n1\n1\n{}\n\n\n\n2\n{}\n\n\n\n".format(
            data_folder_path, data_path, catch_exceptions=False
        ),
    )
    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/Titanic/warning/".format(
            root_dir
        )
        in mock_webbrowser.call_args[0][0]
    )

    # Now the test begins - rerun the init on an existing project

    runner = CliRunner(mix_stderr=False)
    with pytest.warns(
        UserWarning, match="Warning. An existing `great_expectations.yml` was found"
    ):
        result = runner.invoke(
            cli, ["init", "-d", root_dir], input="n\n", catch_exceptions=False
        )
    stdout = result.stdout
    assert mock_webbrowser.call_count == 1

    assert result.exit_code == 0
    assert "This looks like an existing project that" in stdout
    assert "appears complete" in stdout
    assert "ready to roll" in stdout
    assert "Would you like to build & view this project's Data Docs" in stdout
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_run_on_non_existent_validation_operator_with_ge_config_v2(
        mock_emit, caplog,
        titanic_data_context_stats_enabled_config_version_2):
    context = titanic_data_context_stats_enabled_config_version_2
    root_dir = context.root_directory
    csv_path = os.path.join(root_dir, "..", "data", "Titanic.csv")

    suite = context.create_expectation_suite("iceberg")
    context.save_expectation_suite(suite)
    assert context.list_expectation_suite_names() == ["iceberg"]
    mock_emit.reset_mock()

    checkpoint_file_path = os.path.join(
        context.root_directory,
        DataContextConfigDefaults.CHECKPOINTS_BASE_DIRECTORY.value,
        "bad_operator.yml",
    )
    bad = {
        "validation_operator_name":
        "foo",
        "batches": [
            {
                "batch_kwargs": {
                    "path": csv_path,
                    "datasource": "mydatasource",
                    "reader_method": "read_csv",
                },
                "expectation_suite_names": ["iceberg"],
            },
        ],
    }
    _write_checkpoint_dict_to_file(bad, checkpoint_file_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint run bad_operator -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 1

    assert (
        f"No validation operator `foo` was found in your project. Please verify this in your great_expectations.yml"
        in stdout)
    usage_emits = mock_emit.call_args_list

    assert mock_emit.call_count == 3
    assert usage_emits[0][0][0]["success"] is True
    assert usage_emits[1][0][0]["success"] is False
    assert usage_emits[2][0][0]["success"] is False

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
def initialized_sqlite_project(
    mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa
):
    """This is an initialized project through the CLI."""
    project_dir = str(tmp_path_factory.mktemp("my_rad_project"))

    engine = sa.create_engine(
        "sqlite:///{}".format(titanic_sqlite_db_file), pool_recycle=3600
    )

    inspector = sa.inspect(engine)

    # get the default schema and table for testing
    schemas = inspector.get_schema_names()
    default_schema = schemas[0]

    tables = [
        table_name for table_name in inspector.get_table_names(schema=default_schema)
    ]
    default_table = tables[0]

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input="\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n".format(
            url=engine.url, schema=default_schema, table=default_table
        ),
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert mock_webbrowser.call_count == 1
    assert (
        "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format(
            project_dir
        )
        in mock_webbrowser.call_args[0][0]
    )

    assert_no_logging_messages_or_tracebacks(caplog, result)

    context = DataContext(os.path.join(project_dir, DataContext.GE_DIR))
    assert isinstance(context, DataContext)
    assert len(context.list_datasources()) == 1
    assert context.list_datasources() == [
        {
            "class_name": "SqlAlchemyDatasource",
            "name": "titanic",
            "module_name": "great_expectations.datasource",
            "credentials": {"url": str(engine.url)},
            "data_asset_type": {
                "class_name": "SqlAlchemyDataset",
                "module_name": "great_expectations.dataset",
            },
        }
    ]
    return project_dir
示例#14
0
def test_cli_init_on_new_project_with_broken_excel_file_without_trying_again(
    caplog, tmp_path_factory
):
    project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff"))
    os.makedirs(os.path.join(project_dir, "data"))
    data_folder_path = os.path.join(project_dir, "data")
    data_path = os.path.join(project_dir, "data", "broken_excel_file.xls")
    fixture_path = file_relative_path(__file__, "../../test_sets/broken_excel_file.xls")
    shutil.copy(fixture_path, data_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["init", "-d", project_dir],
        input=f"\n\n1\n1\n{data_folder_path}\n\n\n\n2\n{data_path}\nn\n",
        catch_exceptions=False,
    )
    stdout = result.output

    assert len(stdout) < 6000, "CLI output is unreasonably long."
    assert "Always know what to expect from your data" in stdout
    assert "What data would you like Great Expectations to connect to" in stdout
    assert "What are you processing your files with" in stdout
    assert (
        "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)"
        in stdout
    )
    assert "Cannot load file." in stdout
    assert (
        "- Please check the file and try again or select a different data file."
        in stdout
    )
    assert (
        "- Error: Excel file format cannot be determined, you must specify an engine manually."
        in stdout
    ) or (
        "Error: Unsupported format, or corrupt file: Expected BOF record; found b'PRODUCTI'"
        in stdout
    )
    assert "Try again? [Y/n]:" in stdout
    assert (
        "We have saved your setup progress. When you are ready, run great_expectations init to continue."
        in stdout
    )

    assert os.path.isdir(os.path.join(project_dir, "great_expectations"))
    config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml")
    assert os.path.isfile(config_path)

    config = yaml.load(open(config_path))
    data_source_class = config["datasources"]["data__dir"]["data_asset_type"][
        "class_name"
    ]
    assert data_source_class == "PandasDataset"

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_script_happy_path_executable_failed_validation_with_ge_config_v2(
        caplog,
        titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled):
    """
    We call the "checkpoint script" command on a project with a checkpoint.

    The command should:
    - create the script (note output is tested in other tests)

    When run the script should:
    - execute
    - return a 1 status code
    - print a failure message
    """
    context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled
    root_dir = context.root_directory
    # mangle the csv
    csv_path = os.path.join(context.root_directory, "..", "data",
                            "Titanic.csv")
    with open(csv_path, "w") as f:
        f.write("foo,bar\n1,2\n")

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint script my_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )

    script_path = os.path.abspath(
        os.path.join(root_dir, context.GE_UNCOMMITTED_DIR,
                     "run_my_checkpoint.py"))
    assert os.path.isfile(script_path)

    # In travis on osx, python may not execute from the build dir
    cmdstring = f"python {script_path}"
    if os.environ.get("TRAVIS_OS_NAME") == "osx":
        build_dir = os.environ.get("TRAVIS_BUILD_DIR")
        print(os.listdir(build_dir))
        cmdstring = f"python3 {script_path}"
    print("about to run: " + cmdstring)
    print(os.curdir)
    print(os.listdir(os.curdir))
    print(os.listdir(os.path.abspath(os.path.join(root_dir, ".."))))

    status, output = subprocess.getstatusoutput(cmdstring)
    print(f"\n\nScript exited with code: {status} and output:\n{output}")
    assert status == 1
    assert "Validation failed!" in output
def test_checkpoint_new_specify_datasource_with_ge_config_v2(
    mock_emit,
    caplog,
    titanic_data_context_stats_enabled_config_version_2,
    titanic_expectation_suite,
):
    context = titanic_data_context_stats_enabled_config_version_2
    root_dir = context.root_directory
    assert context.list_checkpoints() == []
    context.save_expectation_suite(titanic_expectation_suite)
    assert context.list_expectation_suite_names() == ["Titanic.warning"]
    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint new passengers Titanic.warning -d {root_dir} --datasource mydatasource",
        input="1\n1\n",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 0
    assert "A checkpoint named `passengers` was added to your project" in stdout

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.new",
            "event_payload": {
                "api_version": "v2"
            },
            "success": True,
        }),
    ]
    expected_checkpoint = os.path.join(
        root_dir,
        DataContextConfigDefaults.CHECKPOINTS_BASE_DIRECTORY.value,
        "passengers.yml",
    )
    assert os.path.isfile(expected_checkpoint)

    # Newup a context for additional assertions
    context = DataContext(root_dir)
    assert context.list_checkpoints() == ["passengers"]

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
def test_validation_operator_run_noninteractive_validation_config_file_does_is_misconfigured(
    caplog,
    data_context_parameterized_expectation_suite_no_checkpoint_store,
    filesystem_csv_2,
):
    """
    Non-nteractive mode. Use the --validation_config_file argument to pass the path
    to a validation config file that is misconfigured - one of the batches does not
    have expectation_suite_names attribute
    """
    not_so_empty_data_context = (
        data_context_parameterized_expectation_suite_no_checkpoint_store)
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    csv_path = os.path.join(filesystem_csv_2, "f1.csv")

    validation_config = {
        "validation_operator_name":
        "default",
        "batches": [{
            "batch_kwargs": {
                "path": csv_path,
                "datasource": "mydatasource",
                "reader_method": "read_csv",
            },
            "wrong_attribute_expectation_suite_names":
            ["my_dag_node.default1"],
        }],
    }
    validation_config_file_path = os.path.join(root_dir, "uncommitted",
                                               "validation_config_1.json")
    with open(validation_config_file_path, "w") as f:
        json.dump(validation_config, f)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "validation-operator",
            "run",
            "-d",
            root_dir,
            "--validation_config_file",
            validation_config_file_path,
        ],
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert (
        "is misconfigured: Each batch must have a list of expectation suite names"
        in stdout)
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
示例#18
0
def test_project_check_on_missing_ge_dir_guides_user_to_fix(
        caplog, tmp_path_factory):
    project_dir = str(tmp_path_factory.mktemp("empty_dir"))
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["project", "check-config", "-d", project_dir],
                           catch_exceptions=False)
    stdout = result.output
    assert "Checking your config files for validity" in stdout
    assert "Unfortunately, your config appears to be invalid" in stdout
    assert "Error: No great_expectations directory was found here!" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
示例#19
0
def test_project_check_on_valid_project_says_so(caplog, titanic_data_context):
    project_dir = titanic_data_context.root_directory
    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["project", "check-config", "-d", project_dir],
                           catch_exceptions=False)
    assert "Checking your config files for validity" in result.output
    assert "Your config file appears valid" in result.output
    assert result.exit_code == 0
    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE,
    )
示例#20
0
def test_project_check_on_project_with_missing_config_file_guides_user(
        caplog, titanic_data_context):
    project_dir = titanic_data_context.root_directory
    # Remove the config file.
    os.remove(os.path.join(project_dir, "great_expectations.yml"))

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(cli, ["project", "check-config", "-d", project_dir],
                           catch_exceptions=False)
    assert result.exit_code == 1
    assert "Checking your config files for validity" in result.output
    assert "Unfortunately, your config appears to be invalid" in result.output
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_script_happy_path_generates_script_with_ge_config_v2(
    mock_emit, caplog, titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled
):
    context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled
    root_dir = context.root_directory
    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint script my_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 0
    assert (
        "A python script was created that runs the checkpoint named: `my_checkpoint`"
        in stdout
    )
    assert (
        "The script is located in `great_expectations/uncommitted/run_my_checkpoint.py`"
        in stdout
    )
    assert (
        "The script can be run with `python great_expectations/uncommitted/run_my_checkpoint.py`"
        in stdout
    )

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call(
            {"event_payload": {}, "event": "data_context.__init__", "success": True}
        ),
        mock.call(
            {
                "event": "cli.checkpoint.script",
                "event_payload": {"api_version": "v2"},
                "success": True,
            }
        ),
    ]
    expected_script = os.path.join(
        root_dir, context.GE_UNCOMMITTED_DIR, "run_my_checkpoint.py"
    )
    assert os.path.isfile(expected_script)

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
def test_checkpoint_script_raises_error_if_python_file_exists_with_ge_config_v2(
        mock_emit, caplog,
        titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled):
    context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled
    root_dir = context.root_directory
    assert context.list_checkpoints() == ["my_checkpoint"]
    script_path = os.path.join(root_dir, context.GE_UNCOMMITTED_DIR,
                               "run_my_checkpoint.py")
    with open(script_path, "w") as f:
        f.write("script here")
    assert os.path.isfile(script_path)
    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint script my_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert (
        "Warning! A script named run_my_checkpoint.py already exists and this command will not overwrite it."
        in stdout)
    assert result.exit_code == 1

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.script",
            "event_payload": {
                "api_version": "v2"
            },
            "success": False,
        }),
    ]

    # assert the script has original contents
    with open(script_path) as f:
        assert f.read() == "script here"

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
def test_checkpoint_run_happy_path_with_failed_validation_with_ge_config_v2(
        mock_emit, caplog,
        titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled):
    context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled
    root_dir = context.root_directory
    # mangle the csv
    csv_path = os.path.join(context.root_directory, "..", "data",
                            "Titanic.csv")
    with open(csv_path, "w") as f:
        f.write("foo,bar\n1,2\n")

    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint run my_checkpoint -d {root_dir}",
        catch_exceptions=False,
    )
    stdout = result.stdout
    print(stdout)
    assert result.exit_code == 1
    assert "Validation failed!" in stdout

    assert mock_emit.call_count == 5
    usage_emits = mock_emit.call_args_list
    assert usage_emits[0] == mock.call({
        "event_payload": {},
        "event": "data_context.__init__",
        "success": True
    })
    assert usage_emits[1][0][0]["event"] == "data_asset.validate"
    assert usage_emits[1][0][0]["success"] is True

    assert usage_emits[2][0][0]["event"] == "data_context.build_data_docs"
    assert usage_emits[2][0][0]["success"] is True

    assert usage_emits[4] == mock.call({
        "event": "cli.checkpoint.run",
        "event_payload": {
            "api_version": "v2"
        },
        "success": True,
    })

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=
        LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE,
    )
def test_cli_datasource_profile_with_invalid_data_asset_arg_answering_no(
    caplog, empty_data_context, filesystem_csv_2
):
    empty_data_context.add_datasource(
        "my_datasource",
        module_name="great_expectations.datasource",
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": str(filesystem_csv_2),
            }
        },
    )

    not_so_empty_data_context = empty_data_context

    project_root_dir = not_so_empty_data_context.root_directory

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "datasource",
            "profile",
            "my_datasource",
            "--data-assets",
            "bad-bad-asset",
            "-d",
            project_root_dir,
            "--no-view",
        ],
        input="2\n",
        catch_exceptions=False,
    )

    stdout = result.stdout
    assert (
        "Some of the data assets you specified were not found: bad-bad-asset" in stdout
    )
    assert "Choose how to proceed" in stdout
    assert "Skipping profiling for now." in stdout

    context = DataContext(project_root_dir)
    assert len(context.list_datasources()) == 1

    expectations_store = context.stores["expectations_store"]
    suites = expectations_store.list_keys()
    assert len(suites) == 0
    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_new_raises_error_if_checkpoints_directory_is_missing_with_ge_config_v2(
    mock_emit,
    caplog,
    titanic_data_context_stats_enabled_config_version_2,
    titanic_expectation_suite,
):
    context = titanic_data_context_stats_enabled_config_version_2
    root_dir = context.root_directory
    checkpoints_dir = os.path.join(
        root_dir, DataContextConfigDefaults.CHECKPOINTS_BASE_DIRECTORY.value)
    shutil.rmtree(checkpoints_dir)
    assert not os.path.isdir(checkpoints_dir)

    context.save_expectation_suite(titanic_expectation_suite)
    assert context.list_expectation_suite_names() == ["Titanic.warning"]
    mock_emit.reset_mock()

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"checkpoint new passengers Titanic.warning -d {root_dir}",
        input="1\n1\n",
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert result.exit_code == 1
    assert (
        'Attempted to access the "checkpoint_store_name" field with a legacy config version (2.0) and no `checkpoints` directory.'
        in stdout)

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call({
            "event_payload": {},
            "event": "data_context.__init__",
            "success": True
        }),
        mock.call({
            "event": "cli.checkpoint.new",
            "event_payload": {
                "api_version": "v2"
            },
            "success": False,
        }),
    ]

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
    )
示例#26
0
def test_init_on_existing_project_with_datasource_with_no_suite_create_one(
    mock_browser,
    caplog,
    initialized_project,
):
    project_dir = initialized_project
    ge_dir = os.path.join(project_dir, DataContext.GE_DIR)
    uncommitted_dir = os.path.join(ge_dir, "uncommitted")

    data_folder_path = os.path.join(project_dir, "data")
    data_path = os.path.join(project_dir, "data", "Titanic.csv")

    # mangle the setup to remove all traces of any suite
    expectations_dir = os.path.join(ge_dir, "expectations")
    data_docs_dir = os.path.join(uncommitted_dir, "data_docs")
    validations_dir = os.path.join(uncommitted_dir, "validations")

    _delete_and_recreate_dir(expectations_dir)
    _delete_and_recreate_dir(data_docs_dir)
    _delete_and_recreate_dir(validations_dir)

    context = DataContext(ge_dir)
    assert context.list_expectation_suites() == []

    runner = CliRunner(mix_stderr=False)
    with pytest.warns(
        UserWarning, match="Warning. An existing `great_expectations.yml` was found"
    ):
        result = runner.invoke(
            cli,
            ["init", "-d", project_dir],
            input=f"\n2\n{data_path}\nsink_me\n\n\n",
            catch_exceptions=False,
        )
    stdout = result.stdout
    assert result.exit_code == 0
    assert mock_browser.call_count == 1

    assert "Error: invalid input" not in stdout
    assert "Always know what to expect from your data" in stdout
    assert (
        "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)"
        in stdout
    )
    assert "Generating example Expectation Suite..." in stdout
    assert "The following Data Docs sites will be built" in stdout
    assert "Great Expectations is now set up" in stdout

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_list(empty_data_context, empty_sqlite_db, caplog):
    """Test an empty project and after adding a single datasource."""
    project_root_dir = empty_data_context.root_directory
    context = DataContext(project_root_dir)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )

    stdout = result.stdout.strip()
    assert "No Datasources found" in stdout
    assert context.list_datasources() == []

    datasource_name = "wow_a_datasource"
    _add_datasource_and_credentials_to_context(
        context, datasource_name, empty_sqlite_db
    )

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False
    )
    url = str(empty_sqlite_db.engine.url)
    expected_output = """\
1 Datasource found:

 - name: wow_a_datasource
   module_name: great_expectations.datasource
   class_name: SqlAlchemyDatasource
   batch_kwargs_generators:
     default:
       class_name: TableBatchKwargsGenerator
   credentials:
     url: {}
   data_asset_type:
     class_name: SqlAlchemyDataset
     module_name: None
""".format(
        url
    ).strip()
    stdout = result.stdout.strip()

    assert stdout == expected_output

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_noninteractive_golden_path(
        caplog, data_context_simple_expectation_suite, filesystem_csv_2):
    """
    Non-nteractive mode golden path - use the --validation_config_file argument to pass the path
    to a valid validation config file
    """
    not_so_empty_data_context = data_context_simple_expectation_suite
    root_dir = not_so_empty_data_context.root_directory
    os.mkdir(os.path.join(root_dir, "uncommitted"))

    csv_path = os.path.join(filesystem_csv_2, "f1.csv")

    validation_config = {
        "validation_operator_name":
        "default",
        "batches": [{
            "batch_kwargs": {
                "path": csv_path,
                "datasource": "mydatasource",
                "reader_method": "read_csv",
            },
            "expectation_suite_names": ["default"],
        }],
    }
    validation_config_file_path = os.path.join(root_dir, "uncommitted",
                                               "validation_config_1.json")
    with open(validation_config_file_path, "w") as f:
        json.dump(validation_config, f)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        [
            "validation-operator",
            "run",
            "-d",
            root_dir,
            "--validation_config_file",
            validation_config_file_path,
        ],
        catch_exceptions=False,
    )
    stdout = result.stdout
    assert "Validation failed" in stdout
    assert result.exit_code == 1
    assert_no_logging_messages_or_tracebacks(caplog, result)
示例#29
0
def test_store_list_with_four_stores(caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    runner = CliRunner(mix_stderr=False)

    expected_result = """\
5 Stores found:

 - name: expectations_store
   class_name: ExpectationsStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: expectations/

 - name: validations_store
   class_name: ValidationsStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: uncommitted/validations/

 - name: evaluation_parameter_store
   class_name: EvaluationParameterStore

 - name: checkpoint_store
   class_name: CheckpointStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: checkpoints/
     suppress_store_backend_id: True

 - name: profiler_store
   class_name: ProfilerStore
   store_backend:
     class_name: TupleFilesystemStoreBackend
     base_directory: profilers/
     suppress_store_backend_id: True"""
    result = runner.invoke(
        cli,
        f"store list -d {project_dir}",
        catch_exceptions=False,
    )
    print(result.output)
    assert result.exit_code == 0
    assert result.output.strip() == expected_result

    assert_no_logging_messages_or_tracebacks(caplog, result)
def test_store_list_with_zero_stores(caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context._project_config.stores = {}
    context._save_project_config()
    runner = CliRunner(mix_stderr=False)

    result = runner.invoke(
        cli,
        "store list -d {}".format(project_dir),
        catch_exceptions=False,
    )
    assert result.exit_code == 1
    assert (
        "Your configuration file is not a valid yml file likely due to a yml syntax error"
        in result.output.strip())

    assert_no_logging_messages_or_tracebacks(caplog, result)