示例#1
0
def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    flag_build_docs=True,
    open_docs=False,
    profiler_configuration="demo",
    data_asset_name=None,
):
    """
    Create a new expectation suite.

    WARNING: the flow and name of this method and its interaction with _profile_to_create_a_suite
    require a serious revisiting.
    :return: a tuple: (success, suite name, profiling_results)
    """
    if generator_asset:
        warnings.warn(
            "The 'generator_asset' argument will be deprecated and renamed to 'data_asset_name'. "
            "Please update code accordingly.",
            DeprecationWarning,
        )
        data_asset_name = generator_asset

    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)

    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (
        batch_kwargs_generator_name is None
        or data_asset_name is None
        or batch_kwargs is None
    ):
        (
            datasource_name,
            batch_kwargs_generator_name,
            data_asset_name,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            data_asset_name=data_asset_name,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, data_asset_name
        )
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new Expectation Suite",
                default=default_expectation_suite_name,
            )
            if expectation_suite_name in context.list_expectation_suite_names():
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name, None

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        data_asset_name,
        profiler_configuration,
    )

    if flag_build_docs:
        build_docs(context, view=False)
        if open_docs:
            attempt_to_open_validation_results_in_data_docs(context, profiling_results)

    return True, expectation_suite_name, profiling_results
示例#2
0
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs, usage_event):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    context = load_data_context_with_error_handling(directory)

    try:
        suite = load_expectation_suite(context, suite)
        citations = suite.get_citations(require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = toolkit.load_batch(context, suite, batch_kwargs)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
                """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context, event=usage_event, success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(context, datasource_name=data_source.name,
                                     additional_batch_kwargs=additional_batch_kwargs)

        notebook_name = "edit_{}.ipynb".format(suite.expectation_suite_name)
        notebook_path = _get_notebook_path(context, notebook_name)
        SuiteEditNotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message(
                f"To continue editing this suite, run <green>jupyter notebook {notebook_path}</green>"
            )

        payload = edit_expectation_suite_usage_statistics(
            data_context=context, expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context, event=usage_event, event_payload=payload, success=True
        )

        if jupyter:
            toolkit.launch_jupyter_notebook(notebook_path)

    except Exception as e:
        send_usage_message(data_context=context, event=usage_event, success=False)
        raise e
示例#3
0
def exit_with_failure_message_and_stats(
    context: DataContext, usage_event: str, message: str
) -> None:
    cli_message(message)
    send_usage_message(context, event=usage_event, success=False)
    sys.exit(1)
示例#4
0
def upgrade_project_one_version_increment(
    context_root_dir: str,
    ge_config_version: float,
    continuation_message: str,
    from_cli_upgrade_command: bool = False,
) -> [bool, bool]:  # Returns increment_version, exception_occurred
    upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(int(ge_config_version))
    if not upgrade_helper_class:
        return False, False
    target_ge_config_version = int(ge_config_version) + 1
    # set version temporarily to CURRENT_GE_CONFIG_VERSION to get functional DataContext
    DataContext.set_ge_config_version(
        config_version=CURRENT_GE_CONFIG_VERSION,
        context_root_dir=context_root_dir,
    )
    upgrade_helper = upgrade_helper_class(context_root_dir=context_root_dir)
    upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview()

    if confirmation_required or from_cli_upgrade_command:
        upgrade_confirmed = confirm_proceed_or_exit(
            confirm_prompt=upgrade_overview,
            continuation_message=continuation_message,
            exit_on_no=False,
        )
    else:
        upgrade_confirmed = True

    if upgrade_confirmed:
        cli_message("\nUpgrading project...")
        cli_message(SECTION_SEPARATOR)
        # run upgrade and get report of what was done, if version number should be incremented
        (
            upgrade_report,
            increment_version,
            exception_occurred,
        ) = upgrade_helper.upgrade_project()
        # display report to user
        cli_message(upgrade_report)
        if exception_occurred:
            # restore version number to current number
            DataContext.set_ge_config_version(
                ge_config_version, context_root_dir, validate_config_version=False
            )
            # display report to user
            return False, True
        # set config version to target version
        if increment_version:
            DataContext.set_ge_config_version(
                target_ge_config_version,
                context_root_dir,
                validate_config_version=False,
            )
            return True, False
        # restore version number to current number
        DataContext.set_ge_config_version(
            ge_config_version, context_root_dir, validate_config_version=False
        )
        return False, False

    # restore version number to current number
    DataContext.set_ge_config_version(
        ge_config_version, context_root_dir, validate_config_version=False
    )
    cli_message(continuation_message)
    sys.exit(0)
示例#5
0
def _suite_edit(suite, datasource, directory, jupyter, batch_kwargs):
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    try:
        suite = load_expectation_suite(context, suite)
        citations = suite.get_citations(sort=True, require_batch_kwargs=True)

        if batch_kwargs_json:
            try:
                batch_kwargs = json.loads(batch_kwargs_json)
                if datasource:
                    batch_kwargs["datasource"] = datasource
                _batch = context.get_batch(batch_kwargs, suite.expectation_suite_name)
                assert isinstance(_batch, DataAsset)
            except json_parse_exception as je:
                cli_message(
                    "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                        je
                    )
                )
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)
            except ge_exceptions.DataContextError:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.</red>"
                )
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)
            except ValueError as ve:
                cli_message(
                    "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                        ve
                    )
                )
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)
        elif citations:
            citation = citations[-1]
            batch_kwargs = citation.get("batch_kwargs")

        if not batch_kwargs:
            cli_message(
            """
A batch of data is required to edit the suite - let's help you to specify it."""
            )

            additional_batch_kwargs = None
            try:
                data_source = select_datasource(context, datasource_name=datasource)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.suite.edit",
                    success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(
                    context,
                    datasource_name=data_source.name,
                    batch_kwargs_generator_name=None,
                    generator_asset=None,
                    additional_batch_kwargs=additional_batch_kwargs,
                )

        notebook_name = "{}.ipynb".format(suite.expectation_suite_name)

        notebook_path = os.path.join(
            context.root_directory, context.GE_EDIT_NOTEBOOK_DIR, notebook_name
        )
        NotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

        if not jupyter:
            cli_message("To continue editing this suite, run <green>jupyter "
                        f"notebook {notebook_path}</green>")

        payload = edit_expectation_suite_usage_statistics(
            data_context=context,
            expectation_suite_name=suite.expectation_suite_name
        )

        send_usage_message(
            data_context=context,
            event="cli.suite.edit",
            event_payload=payload,
            success=True
        )

        if jupyter:
            subprocess.call(["jupyter", "notebook", notebook_path])

    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.suite.edit",
            success=False
        )
        raise e
示例#6
0
def tell_user_suite_exists(suite_name: str) -> None:
    cli_message(
        f"""<red>An expectation suite named `{suite_name}` already exists.</red>
  - If you intend to edit the suite please use `great_expectations suite edit {suite_name}`."""
    )
示例#7
0
def suite_edit(suite, datasource, directory, jupyter, batch_kwargs):
    """
    Generate a Jupyter notebook for editing an existing expectation suite.

    The SUITE argument is required. This is the name you gave to the suite
    when you created it.

    A batch of data is required to edit the suite, which is used as a sample.

    The edit command will help you specify a batch interactively. Or you can
    specify them manually by providing --batch-kwargs in valid JSON format.

    Read more about specifying batches of data in the documentation: https://docs.greatexpectations.io/
    """
    batch_kwargs_json = batch_kwargs
    batch_kwargs = None
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return
    except ge_exceptions.ZeroDotSevenConfigVersionError as err:
        _offer_to_install_new_template(err, context.root_directory)
        return

    suite = _load_suite(context, suite)
    citations = suite.get_citations(sort=True, require_batch_kwargs=True)

    if batch_kwargs_json:
        try:
            batch_kwargs = json.loads(batch_kwargs_json)
            if datasource:
                batch_kwargs["datasource"] = datasource
            _batch = context.get_batch(batch_kwargs, suite.expectation_suite_name)
            assert isinstance(_batch, DataAsset)
        except json_parse_exception as je:
            cli_message(
                "<red>Please check that your batch_kwargs are valid JSON.\n{}</red>".format(
                    je
                )
            )
            sys.exit(1)
        except ge_exceptions.DataContextError:
            cli_message(
                "<red>Please check that your batch_kwargs are able to load a batch.</red>"
            )
            sys.exit(1)
        except ValueError as ve:
            cli_message(
                "<red>Please check that your batch_kwargs are able to load a batch.\n{}</red>".format(
                    ve
                )
            )
            sys.exit(1)
    elif citations:
        citation = citations[-1]
        batch_kwargs = citation.get("batch_kwargs")

    if not batch_kwargs:
        cli_message(
            """
A batch of data is required to edit the suite - let's help you to specify it."""
        )

        additional_batch_kwargs = None
        try:
            data_source = select_datasource(context, datasource_name=datasource)
        except ValueError as ve:
            cli_message("<red>{}</red>".format(ve))
            sys.exit(1)

        if not data_source:
            cli_message("<red>No datasources found in the context.</red>")
            sys.exit(1)

        if batch_kwargs is None:
            (
                datasource_name,
                batch_kwarg_generator,
                data_asset,
                batch_kwargs,
            ) = get_batch_kwargs(
                context,
                datasource_name=data_source.name,
                generator_name=None,
                generator_asset=None,
                additional_batch_kwargs=additional_batch_kwargs,
            )

    notebook_name = "{}.ipynb".format(suite.expectation_suite_name)

    notebook_path = os.path.join(
        context.root_directory, context.GE_EDIT_NOTEBOOK_DIR, notebook_name
    )
    NotebookRenderer().render_to_disk(suite, notebook_path, batch_kwargs)

    cli_message(
        "To continue editing this suite, run <green>jupyter notebook {}</green>".format(
            notebook_path
        )
    )

    if jupyter:
        subprocess.call(["jupyter", "notebook", notebook_path])
示例#8
0
def init(target_directory, view):
    """
    Create a new project and help with onboarding.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    ge_yml = os.path.join(ge_dir, DataContext.GE_YML)

    cli_message(GREETING)

    # TODO this should be a property
    if os.path.isfile(ge_yml):
        if DataContext.all_uncommitted_directories_exist(ge_dir) and \
                DataContext.config_variables_yml_exist(ge_dir):
            # Ensure the context can be instantiated
            try:
                _ = DataContext(ge_dir)
                cli_message(PROJECT_IS_COMPLETE)
            except ge_exceptions.DataContextError as e:
                cli_message("<red>{}</red>".format(e))
                exit(5)
        else:
            _complete_onboarding(target_directory)

        try:
            # if expectations exist, offer to build docs
            context = DataContext(ge_dir)
            if context.list_expectation_suite_keys():
                if click.confirm(BUILD_DOCS_PROMPT, default=True):
                    context.build_data_docs()
                    context.open_data_docs()
        except ge_exceptions.DataContextError as e:
            cli_message("<red>{}</red>".format(e))
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            exit(0)

        context, data_source_name, data_source_type = _create_new_project(
            target_directory)
        if not data_source_name:  # no datasource was created
            return

        profile_datasource(context,
                           data_source_name,
                           open_docs=view,
                           additional_batch_kwargs={"limit": 1000})
        cli_message(
            """\n<cyan>Great Expectations is now set up in your project!</cyan>"""
        )
示例#9
0
def _complete_onboarding(target_dir):
    DataContext.create(target_dir)
    cli_message(ONBOARDING_COMPLETE)
    return True
示例#10
0
def init(target_directory, view):
    """
    Initialize a new Great Expectations project.

    This guided input walks the user through setting up a new project and also
    onboards a new developer in an existing project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    target_directory = os.path.abspath(target_directory)
    ge_dir = _get_full_path_to_ge_dir(target_directory)
    cli_message(GREETING)

    if DataContext.does_config_exist_on_disk(ge_dir):
        try:
            if DataContext.is_project_initialized(ge_dir):
                # Ensure the context can be instantiated
                cli_message(PROJECT_IS_COMPLETE)
        except (DataContextError, DatasourceInitializationError) as e:
            cli_message("<red>{}</red>".format(e.message))
            sys.exit(1)

        try:
            context = DataContext.create(target_directory)
            cli_message(ONBOARDING_COMPLETE)
            # TODO if this is correct, ensure this is covered by a test
            # cli_message(SETUP_SUCCESS)
            # exit(0)
        except DataContextError as e:
            cli_message("<red>{}</red>".format(e.message))
            # TODO ensure this is covered by a test
            exit(5)
    else:
        if not click.confirm(LETS_BEGIN_PROMPT, default=True):
            cli_message(RUN_INIT_AGAIN)
            # TODO ensure this is covered by a test
            exit(0)

        try:
            context = DataContext.create(target_directory)
        except DataContextError as e:
            # TODO ensure this is covered by a test
            cli_message("<red>{}</red>".format(e))

    try:
        # if expectations exist, offer to build docs
        context = DataContext(ge_dir)
        if context.list_expectation_suites():
            if click.confirm(BUILD_DOCS_PROMPT, default=True):
                build_docs(context, view=view)

        else:
            datasources = context.list_datasources()
            if len(datasources) == 0:
                datasource_name, data_source_type = add_datasource_impl(
                    context, choose_one_data_asset=True)
                if not datasource_name:  # no datasource was created
                    sys.exit(1)

            datasources = context.list_datasources()
            if len(datasources) == 1:
                datasource_name = datasources[0]["name"]

                success, suite_name = create_expectation_suite_impl(
                    context,
                    datasource_name=datasource_name,
                    show_intro_message=False,
                    additional_batch_kwargs={"limit": 1000},
                    open_docs=view,
                )
                if success:
                    cli_message(
                        "A new Expectation suite '{}' was added to your project"
                        .format(suite_name))

                cli_message(SETUP_SUCCESS)
                sys.exit(0)
    except (DataContextError, ge_exceptions.ProfilerError, IOError,
            SQLAlchemyError) as e:
        cli_message("<red>{}</red>".format(e))
        sys.exit(1)
示例#11
0
def _suite_new(suite: str, directory: str, empty: bool, jupyter: bool,
               view: bool, batch_kwargs, usage_event: str) -> None:
    # TODO break this up into demo and new
    context = load_data_context_with_error_handling(directory)

    datasource_name = None
    generator_name = None
    generator_asset = None

    try:
        if batch_kwargs is not None:
            batch_kwargs = json.loads(batch_kwargs)

        success, suite_name = create_expectation_suite_impl(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=generator_name,
            generator_asset=generator_asset,
            batch_kwargs=batch_kwargs,
            expectation_suite_name=suite,
            additional_batch_kwargs={"limit": 1000},
            empty_suite=empty,
            show_intro_message=False,
            open_docs=view,
        )
        if success:
            cli_message(
                "A new Expectation suite '{}' was added to your project".
                format(suite_name))
            if empty:
                if jupyter:
                    cli_message(
                        """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")
            _suite_edit(
                suite_name,
                datasource_name,
                directory,
                jupyter=jupyter,
                batch_kwargs=batch_kwargs,
                usage_event=usage_event,
            )
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=True)
        else:
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=False)
    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            IOError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        sys.exit(1)
    except Exception as e:
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        raise e
示例#12
0
def upgrade_project(context_root_dir,
                    ge_config_version,
                    from_cli_upgrade_command=False):
    continuation_message = (
        "\nOk, exiting now. To upgrade at a later time, use the following command: "
        "<cyan>great_expectations project upgrade</cyan>\n\nTo learn more about the upgrade "
        "process, visit "
        "<cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html"
        "</cyan>.\n")
    if from_cli_upgrade_command:
        message = (
            f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - "
            f"the version "
            f"number must be at least {MINIMUM_SUPPORTED_CONFIG_VERSION}.</red>"
        )
    else:
        message = (
            f"<red>\nYour project appears to have an out-of-date config version ({ge_config_version}) - "
            f"the version "
            f"number must be at least {MINIMUM_SUPPORTED_CONFIG_VERSION}.\nIn order to proceed, "
            f"your project must be upgraded.</red>")

    cli_message(message)
    upgrade_prompt = (
        "\nWould you like to run the Upgrade Helper to bring your project up-to-date?"
    )
    confirm_proceed_or_exit(confirm_prompt=upgrade_prompt,
                            continuation_message=continuation_message)
    cli_message(SECTION_SEPARATOR)

    # use loop in case multiple upgrades need to take place
    while ge_config_version < MINIMUM_SUPPORTED_CONFIG_VERSION:
        upgrade_helper_class = GE_UPGRADE_HELPER_VERSION_MAP.get(
            int(ge_config_version))
        if not upgrade_helper_class:
            break
        target_ge_config_version = int(ge_config_version) + 1
        # set version temporarily to MINIMUM_SUPPORTED_CONFIG_VERSION to get functional DataContext
        DataContext.set_ge_config_version(
            config_version=MINIMUM_SUPPORTED_CONFIG_VERSION,
            context_root_dir=context_root_dir,
        )
        upgrade_helper = upgrade_helper_class(
            context_root_dir=context_root_dir)
        upgrade_overview, confirmation_required = upgrade_helper.get_upgrade_overview(
        )

        if confirmation_required:
            upgrade_confirmed = confirm_proceed_or_exit(
                confirm_prompt=upgrade_overview,
                continuation_message=continuation_message,
                exit_on_no=False,
            )
        else:
            upgrade_confirmed = True

        if upgrade_confirmed:
            cli_message("\nUpgrading project...")
            cli_message(SECTION_SEPARATOR)
            # run upgrade and get report of what was done, if version number should be incremented
            upgrade_report, increment_version = upgrade_helper.upgrade_project(
            )
            # display report to user
            cli_message(upgrade_report)
            # set config version to target version
            if increment_version:
                DataContext.set_ge_config_version(
                    target_ge_config_version,
                    context_root_dir,
                    validate_config_version=False,
                )
                ge_config_version += 1
            else:
                # restore version number to current number
                DataContext.set_ge_config_version(
                    ge_config_version,
                    context_root_dir,
                    validate_config_version=False)
                break
        else:
            # restore version number to current number
            DataContext.set_ge_config_version(ge_config_version,
                                              context_root_dir,
                                              validate_config_version=False)
            cli_message(continuation_message)
            sys.exit(0)

    cli_message(SECTION_SEPARATOR)
    upgrade_success_message = "<green>Upgrade complete. Exiting...</green>\n"
    upgrade_incomplete_message = f"""\
<red>The Upgrade Helper was unable to perform a complete project upgrade. Next steps:</red>

    - Please perform any manual steps outlined in the Upgrade Overview and/or Upgrade Report above
    - When complete, increment the config_version key in your <cyan>great_expectations.yml</cyan> to <cyan>{
    ge_config_version + 1}</cyan>\n
To learn more about the upgrade process, visit \
<cyan>https://docs.greatexpectations.io/en/latest/how_to_guides/migrating_versions.html</cyan>
"""

    if ge_config_version < MINIMUM_SUPPORTED_CONFIG_VERSION:
        cli_message(upgrade_incomplete_message)
    else:
        cli_message(upgrade_success_message)
    sys.exit(0)
示例#13
0
def _suite_new(
    suite: str,
    directory: str,
    empty: bool,
    jupyter: bool,
    view: bool,
    batch_kwargs,
    usage_event: str,
) -> None:
    # TODO break this up into demo and new
    context = toolkit.load_data_context_with_error_handling(directory)

    datasource_name = None
    generator_name = None
    data_asset_name = None

    try:
        if batch_kwargs is not None:
            batch_kwargs = json.loads(batch_kwargs)

        success, suite_name, profiling_results = toolkit.create_expectation_suite(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=generator_name,
            data_asset_name=data_asset_name,
            batch_kwargs=batch_kwargs,
            expectation_suite_name=suite,
            additional_batch_kwargs={"limit": 1000},
            empty_suite=empty,
            show_intro_message=False,
            open_docs=view,
        )
        if success:
            if empty:
                if jupyter:
                    cli_message(
                        """<green>Because you requested an empty suite, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=True)

            _suite_edit(
                suite_name,
                datasource_name,
                directory,
                jupyter=jupyter,
                batch_kwargs=batch_kwargs,
                usage_event=
                "cli.suite.edit",  # or else we will be sending `cli.suite.new` which is incorrect
                suppress_usage_message=
                True,  # dont want actually send usage_message since the function call is not the result of actual usage
            )
        else:
            send_usage_message(data_context=context,
                               event=usage_event,
                               success=False)
    except (
            ge_exceptions.DataContextError,
            ge_exceptions.ProfilerError,
            IOError,
            SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        sys.exit(1)
    except Exception as e:
        send_usage_message(data_context=context,
                           event=usage_event,
                           success=False)
        raise e
示例#14
0
def _validate_tap_filename(tap_filename):
    if not tap_filename.endswith(".py"):
        cli_message(
            "<red>Tap filename must end in .py. Please correct and re-run</red>"
        )
        exit(1)
示例#15
0
def create_expectation_suite(
    context,
    datasource_name=None,
    batch_kwargs_generator_name=None,
    generator_asset=None,
    batch_kwargs=None,
    expectation_suite_name=None,
    additional_batch_kwargs=None,
    empty_suite=False,
    show_intro_message=False,
    open_docs=False,
    profiler_configuration="demo",
):
    """
    Create a new expectation suite.

    :return: a tuple: (success, suite name)
    """
    if show_intro_message and not empty_suite:
        cli_message(
            "\n<cyan>========== Create sample Expectations ==========</cyan>\n\n"
        )

    data_source = select_datasource(context, datasource_name=datasource_name)
    if data_source is None:
        # select_datasource takes care of displaying an error message, so all is left here is to exit.
        sys.exit(1)

    datasource_name = data_source.name

    if expectation_suite_name in context.list_expectation_suite_names():
        tell_user_suite_exists(expectation_suite_name)
        sys.exit(1)

    if (batch_kwargs_generator_name is None or generator_asset is None
            or batch_kwargs is None):
        (
            datasource_name,
            batch_kwargs_generator_name,
            generator_asset,
            batch_kwargs,
        ) = get_batch_kwargs(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=batch_kwargs_generator_name,
            generator_asset=generator_asset,
            additional_batch_kwargs=additional_batch_kwargs,
        )
        # In this case, we have "consumed" the additional_batch_kwargs
        additional_batch_kwargs = {}

    if expectation_suite_name is None:
        default_expectation_suite_name = _get_default_expectation_suite_name(
            batch_kwargs, generator_asset)
        while True:
            expectation_suite_name = click.prompt(
                "\nName the new expectation suite",
                default=default_expectation_suite_name,
                show_default=True,
            )
            if expectation_suite_name in context.list_expectation_suite_names(
            ):
                tell_user_suite_exists(expectation_suite_name)
            else:
                break

    if empty_suite:
        create_empty_suite(context, expectation_suite_name, batch_kwargs)
        return True, expectation_suite_name

    profiling_results = _profile_to_create_a_suite(
        additional_batch_kwargs,
        batch_kwargs,
        batch_kwargs_generator_name,
        context,
        datasource_name,
        expectation_suite_name,
        generator_asset,
        profiler_configuration,
    )

    build_docs(context, view=False)
    if open_docs:
        _attempt_to_open_validation_results_in_data_docs(
            context, profiling_results)

    return True, expectation_suite_name
示例#16
0
def _get_datasource(context, datasource):
    datasource = toolkit.select_datasource(context, datasource_name=datasource)
    if not datasource:
        cli_message("<red>No datasources found in the context.</red>")
        sys.exit(1)
    return datasource
def validation_operator_run(name, run_id, validation_config_file, suite, directory):
    # Note though the long lines here aren't pythonic, they look best if Click does the line wraps.
    """
    Run a validation operator against some data.

    There are two modes to run this command:

    1. Interactive (good for development):

        Specify the name of the validation operator using the --name argument
        and the name of the expectation suite using the --suite argument.

        The cli will help you specify the batch of data that you want to
        validate interactively.


    2. Non-interactive (good for production):

        Use the `--validation_config_file` argument to specify the path of the validation configuration JSON file. This file can be used to instruct a validation operator to validate multiple batches of data and use multiple expectation suites to validate each batch.

        Learn how to create a validation config file here:
        https://great-expectations.readthedocs.io/en/latest/command_line.html#great-expectations-validation-operator-run-validation-config-file-validation-config-file-path

        This command exits with 0 if the validation operator ran and the "success" attribute in its return object is True. Otherwise, the command exits with 1.

    To learn more about validation operators, go here:
    https://great-expectations.readthedocs.io/en/latest/features/validation.html#validation-operators
    """

    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("Failed to process <red>{}</red>".format(err.message))
        sys.exit(1)

    try:
        if validation_config_file is not None:
            try:
                with open(validation_config_file) as f:
                    validation_config = json.load(f)
            except (
                IOError,
                json_parse_exception
            ) as e:
                cli_message(f"Failed to process the --validation_config_file argument: <red>{e}</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False
                )
                sys.exit(1)

            validation_config_error_message = _validate_valdiation_config(validation_config)
            if validation_config_error_message is not None:
                cli_message("<red>The validation config in {0:s} is misconfigured: {1:s}</red>".format(validation_config_file, validation_config_error_message))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False
                )
                sys.exit(1)

        else:
            if suite is None:
                cli_message(
"""
Please use --suite argument to specify the name of the expectation suite.
Call `great_expectation suite list` command to list the expectation suites in your project.
"""
                )
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False
                )
                sys.exit(0)

            suite = load_expectation_suite(context, suite)

            if name is None:
                cli_message(
"""
Please use --name argument to specify the name of the validation operator.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                )
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False
                )
                sys.exit(1)
            else:
                if name not in context.list_validation_operator_names():
                    cli_message(
                    f"""
Could not find a validation operator {name}.
Call `great_expectation validation-operator list` command to list the operators in your project.
"""
                    )
                    send_usage_message(
                        data_context=context,
                        event="cli.validation_operator.run",
                        success=False
                    )
                    sys.exit(1)

            batch_kwargs = None

            cli_message(
            """
Let's help you specify the batch of data your want the validation operator to validate."""
            )

            try:
                data_source = select_datasource(context)
            except ValueError as ve:
                cli_message("<red>{}</red>".format(ve))
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False
                )
                sys.exit(1)

            if not data_source:
                cli_message("<red>No datasources found in the context.</red>")
                send_usage_message(
                    data_context=context,
                    event="cli.validation_operator.run",
                    success=False
                )
                sys.exit(1)

            if batch_kwargs is None:
                (
                    datasource_name,
                    batch_kwargs_generator,
                    data_asset,
                    batch_kwargs,
                ) = get_batch_kwargs(context, datasource_name=data_source.name)

            validation_config = {
                "validation_operator_name": name,
                "batches": [
                    {
                        "batch_kwargs": batch_kwargs,
                        "expectation_suite_names": [suite.expectation_suite_name]
                    }
                ]
            }

        try:
            validation_operator_name = validation_config["validation_operator_name"]
            batches_to_validate = []
            for entry in validation_config["batches"]:
                for expectation_suite_name in entry["expectation_suite_names"]:
                    batch = context.get_batch(entry["batch_kwargs"], expectation_suite_name)
                    batches_to_validate.append(batch)

            if run_id is None:
                run_id = datetime.utcnow().strftime("%Y%m%dT%H%M%S.%fZ")

            if suite is None:
                results = context.run_validation_operator(
                    validation_operator_name,
                    assets_to_validate=batches_to_validate,
                    run_id=run_id
                )
            else:
                if suite.evaluation_parameters is None:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id
                    )
                else:
                    results = context.run_validation_operator(
                        validation_operator_name,
                        assets_to_validate=batches_to_validate,
                        run_id=run_id,
                        evaluation_parameters=suite.evaluation_parameters
                    )
        except (
            ge_exceptions.DataContextError,
            IOError,
            SQLAlchemyError,
        ) as e:
            cli_message("<red>{}</red>".format(e))
            send_usage_message(
                data_context=context,
                event="cli.validation_operator.run",
                success=False
            )
            sys.exit(1)

        if not results["success"]:
            cli_message("Validation Failed!")
            send_usage_message(
                data_context=context,
                event="cli.validation_operator.run",
                success=True
            )
            sys.exit(1)
        else:
            cli_message("Validation Succeeded!")
            send_usage_message(
                data_context=context,
                event="cli.validation_operator.run",
                success=True
            )
            sys.exit(0)
    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.validation_operator.run",
            success=False
        )
        raise e
示例#18
0
def _complete_onboarding(target_dir):
    if click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True):
        DataContext.create(target_dir)
        cli_message(ONBOARDING_COMPLETE)
    else:
        cli_message(RUN_INIT_AGAIN)
示例#19
0
def suite_new(suite, directory, empty, jupyter, view, batch_kwargs):
    """
    Create a new Expectation Suite.

    Great Expectations will choose a couple of columns and generate expectations about them
    to demonstrate some examples of assertions you can make about your data.

    If you wish to skip the examples, add the `--empty` flag.
    """
    try:
        context = DataContext(directory)
    except ge_exceptions.ConfigNotFoundError as err:
        cli_message("<red>{}</red>".format(err.message))
        return

    datasource_name = None
    generator_name = None
    generator_asset = None

    try:
        if batch_kwargs is not None:
            batch_kwargs = json.loads(batch_kwargs)

        success, suite_name = create_expectation_suite_impl(
            context,
            datasource_name=datasource_name,
            batch_kwargs_generator_name=generator_name,
            generator_asset=generator_asset,
            batch_kwargs=batch_kwargs,
            expectation_suite_name=suite,
            additional_batch_kwargs={"limit": 1000},
            empty_suite=empty,
            show_intro_message=False,
            open_docs=view,
        )
        if success:
            cli_message(
                "A new Expectation suite '{}' was added to your project".format(
                    suite_name
                )
            )
            if empty:
                if jupyter:
                    cli_message("""<green>Because you requested an empty suite, we'll open a notebook for you now to edit it!
If you wish to avoid this you can add the `--no-jupyter` flag.</green>\n\n""")
                _suite_edit(suite_name, datasource_name, directory, jupyter=jupyter, batch_kwargs=batch_kwargs)
            send_usage_message(
                data_context=context,
                event="cli.suite.new",
                success=True
            )
        else:
            send_usage_message(
                data_context=context,
                event="cli.suite.new",
                success=False
            )
    except (
        ge_exceptions.DataContextError,
        ge_exceptions.ProfilerError,
        IOError,
        SQLAlchemyError,
    ) as e:
        cli_message("<red>{}</red>".format(e))
        send_usage_message(
            data_context=context,
            event="cli.suite.new",
            success=False
        )
        sys.exit(1)
    except Exception as e:
        send_usage_message(
            data_context=context,
            event="cli.suite.new",
            success=False
        )
        raise e