def test_data_context_create_does_nothing_if_all_uncommitted_dirs_exist(
        tmp_path_factory):
    expected = """\
great_expectations/
    .gitignore
    great_expectations.yml
    datasources/
    expectations/
    notebooks/
        create_expectations.ipynb
        integrate_validation_into_pipeline.ipynb
    plugins/
    uncommitted/
        config_variables.yml
        data_docs/
        samples/
        validations/
"""
    project_path = str(tmp_path_factory.mktemp('stuff'))
    ge_dir = os.path.join(project_path, "great_expectations")

    DataContext.create(project_path)
    fixture = gen_directory_tree_str(ge_dir)
    assert fixture == expected

    # re-run create to simulate onboarding
    DataContext.create(project_path)

    obs = gen_directory_tree_str(ge_dir)
    assert obs == expected
def test_existing_local_data_docs_urls_returns_single_url_from_customized_local_site(
        tmp_path_factory):
    empty_directory = str(tmp_path_factory.mktemp("yo_yo"))
    DataContext.create(empty_directory)
    ge_dir = os.path.join(empty_directory, DataContext.GE_DIR)
    context = DataContext(ge_dir)

    context._project_config["data_docs_sites"] = {
        "my_rad_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "base_directory": "uncommitted/data_docs/some/local/path/"
            }
        }
    }

    # TODO Workaround project config programmatic config manipulation
    #  statefulness issues by writing to disk and re-upping a new context
    context._save_project_config()
    context = DataContext(ge_dir)
    context.build_data_docs()

    expected_path = os.path.join(
        ge_dir, "uncommitted/data_docs/some/local/path/index.html")
    assert os.path.isfile(expected_path)

    obs = context.get_docs_sites_urls()
    assert obs == ["file://{}".format(expected_path)]
def test_data_context_do_all_uncommitted_dirs_exist(tmp_path_factory):
    expected = """\
uncommitted/
    config_variables.yml
    data_docs/
    samples/
    validations/
"""
    project_path = str(tmp_path_factory.mktemp('stuff'))
    ge_dir = os.path.join(project_path, "great_expectations")
    uncommitted_dir = os.path.join(ge_dir, "uncommitted")
    DataContext.create(project_path)
    fixture = gen_directory_tree_str(uncommitted_dir)
    print(fixture)
    assert fixture == expected

    # Test that all exist
    assert DataContext.all_uncommitted_directories_exist(ge_dir)

    # remove a few
    shutil.rmtree(os.path.join(uncommitted_dir, "data_docs"))
    shutil.rmtree(os.path.join(uncommitted_dir, "validations"))

    # Test that not all exist
    assert not DataContext.all_uncommitted_directories_exist(project_path)
def empty_context(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('data_context'))
    DataContext.create(project_path)
    ge_dir = os.path.join(project_path, "great_expectations")
    assert os.path.isdir(ge_dir)
    assert os.path.isfile(os.path.join(ge_dir, DataContext.GE_YML))
    context = DataContext(ge_dir)
    assert isinstance(context, DataContext)
    return context
def test_existing_local_data_docs_urls_returns_url_on_project_with_no_datasources_and_a_site_configured(tmp_path_factory):
    """
    This test ensures that a url will be returned for a default site even if a
    datasource is not configured, and docs are not built.
    """
    empty_directory = str(tmp_path_factory.mktemp("another_empty_project"))
    DataContext.create(empty_directory)
    context = DataContext(os.path.join(empty_directory, DataContext.GE_DIR))

    obs = context.get_docs_sites_urls()
    assert len(obs) == 1
    assert obs[0].endswith("great_expectations/uncommitted/data_docs/local_site/index.html")
def test_data_context_create_raises_warning_and_leaves_existing_yml_untouched(
        tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('data_context'))
    DataContext.create(project_path)
    ge_yml = os.path.join(project_path,
                          "great_expectations/great_expectations.yml")
    with open(ge_yml, "a") as ff:
        ff.write("# LOOK I WAS MODIFIED")

    with pytest.warns(UserWarning):
        DataContext.create(project_path)

    with open(ge_yml, "r") as ff:
        obs = ff.read()
    assert "# LOOK I WAS MODIFIED" in obs
Пример #7
0
def init(target_directory):
    """Initialize a new Great Expectations project.

    This guided input walks the user through setting up a project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    try:
        context = DataContext.create(target_directory)
    except DataContextError as err:
        logger.critical(err.message)
        sys.exit(-1)

    base_dir = context.root_directory

    six.print_(
        colored(figlet_format("Great Expectations", font="big"), color="cyan"))

    cli_message(greeting_1)

    if not click.confirm(msg_prompt_lets_begin, default=True):
        cli_message(
            "OK - run great_expectations init again when ready. Exiting...")
        exit(0)

    scaffold_directories_and_notebooks(base_dir)
    cli_message("\nDone.", )

    add_datasource(context)
def test_data_context_create_makes_uncommitted_dirs_when_all_are_missing(
        tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('data_context'))
    DataContext.create(project_path)

    # mangle the existing setup
    ge_dir = os.path.join(project_path, "great_expectations")
    uncommitted_dir = os.path.join(ge_dir, "uncommitted")
    shutil.rmtree(uncommitted_dir)

    # re-run create to simulate onboarding
    DataContext.create(project_path)
    obs = gen_directory_tree_str(ge_dir)

    assert os.path.isdir(uncommitted_dir), "No uncommitted directory created"
    assert obs == """\
Пример #9
0
def _create_new_project(target_directory):
    try:
        context = DataContext.create(target_directory)
        data_source_name, data_source_type = add_datasource_impl(context)
        return context, data_source_name, data_source_type
    except ge_exceptions.DataContextError as err:
        logger.critical(err.message)
        sys.exit(-1)
Пример #10
0
def test_existing_local_data_docs_urls_returns_multiple_urls_from_customized_local_site(
        tmp_path_factory):
    empty_directory = str(tmp_path_factory.mktemp("yo_yo_ma"))
    DataContext.create(empty_directory)
    ge_dir = os.path.join(empty_directory, DataContext.GE_DIR)
    context = DataContext(ge_dir)

    context._project_config["data_docs_sites"] = {
        "my_rad_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "base_directory": "uncommitted/data_docs/some/path/"
            }
        },
        "another_just_amazing_site": {
            "class_name": "SiteBuilder",
            "store_backend": {
                "class_name": "TupleFilesystemStoreBackend",
                "base_directory": "uncommitted/data_docs/another/path/"
            }
        }
    }

    # TODO Workaround project config programmatic config manipulation
    #  statefulness issues by writing to disk and re-upping a new context
    context._save_project_config()
    context = DataContext(ge_dir)
    context.build_data_docs()
    data_docs_dir = os.path.join(ge_dir, "uncommitted/data_docs/")

    path_1 = os.path.join(data_docs_dir, "some/path/index.html")
    path_2 = os.path.join(data_docs_dir, "another/path/index.html")
    for expected_path in [path_1, path_2]:
        assert os.path.isfile(expected_path)

    obs = context.get_docs_sites_urls()

    assert obs == [{
        'site_name': 'my_rad_site',
        'site_url': "file://{}".format(path_1)
    }, {
        'site_name': 'another_just_amazing_site',
        'site_url': "file://{}".format(path_2)
    }]
def test_data_context_create_does_not_overwrite_existing_config_variables_yml(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('data_context'))
    DataContext.create(project_path)
    ge_dir = os.path.join(project_path, "great_expectations")
    uncommitted_dir = os.path.join(ge_dir, "uncommitted")
    config_vars_yml = os.path.join(uncommitted_dir, "config_variables.yml")

    # modify config variables
    with open(config_vars_yml, "a") as ff:
        ff.write("# LOOK I WAS MODIFIED")

    # re-run create to simulate onboarding
    with pytest.warns(UserWarning):
        DataContext.create(project_path)

    with open(config_vars_yml, "r") as ff:
        obs = ff.read()
    print(obs)
    assert "# LOOK I WAS MODIFIED" in obs
Пример #12
0
def test_data_context_create_does_nothing_if_all_uncommitted_dirs_exist(
        tmp_path_factory):
    expected = """\
great_expectations/
    .gitignore
    great_expectations.yml
    checkpoints/
    expectations/
    notebooks/
        pandas/
            validation_playground.ipynb
        spark/
            validation_playground.ipynb
        sql/
            validation_playground.ipynb
    plugins/
        custom_data_docs/
            renderers/
            styles/
                data_docs_custom_styles.css
            views/
    uncommitted/
        config_variables.yml
        data_docs/
        validations/
"""
    project_path = str(tmp_path_factory.mktemp('stuff'))
    ge_dir = os.path.join(project_path, "great_expectations")

    DataContext.create(project_path)
    fixture = gen_directory_tree_str(ge_dir)

    assert fixture == expected

    with pytest.warns(
            UserWarning,
            match="Warning. An existing `great_expectations.yml` was found"):
        # re-run create to simulate onboarding
        DataContext.create(project_path)

    obs = gen_directory_tree_str(ge_dir)
    assert obs == expected
Пример #13
0
def test_data_context_create_builds_base_directories(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp("data_context"))
    context = DataContext.create(project_path)
    assert isinstance(context, DataContext)

    for directory in [
            "expectations",
            "notebooks",
            "plugins",
            "checkpoints",
            "uncommitted",
    ]:
        base_dir = os.path.join(project_path, context.GE_DIR, directory)
        assert os.path.isdir(base_dir)
def test_render_full_static_site_from_empty_project(tmp_path_factory,
                                                    filesystem_csv_3):

    # TODO : Use a standard test fixture
    # TODO : Have that test fixture copy a directory, rather than building a new one from scratch

    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
"""

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    context.add_datasource("titanic",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))

    context.add_datasource("random",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")
    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
    great_expectations/
        .gitignore
        great_expectations.yml
        datasources/
        expectations/
            titanic/
                default/
                    Titanic/
                        BasicDatasetProfiler.json
        notebooks/
            create_expectations.ipynb
            integrate_validation_into_pipeline.ipynb
        plugins/
        uncommitted/
            config_variables.yml
            data_docs/
            samples/
            validations/
                profiling/
                    titanic/
                        default/
                            Titanic/
                                BasicDatasetProfiler.json
"""

    context.profile_datasource("random")
    context.build_data_docs()

    data_docs_dir = os.path.join(project_dir,
                                 "great_expectations/uncommitted/data_docs")
    observed = gen_directory_tree_str(data_docs_dir)
    print(observed)
    assert observed == """\
data_docs/
    local_site/
        index.html
        expectations/
            random/
                default/
                    f1/
                        BasicDatasetProfiler.html
                    f2/
                        BasicDatasetProfiler.html
            titanic/
                default/
                    Titanic/
                        BasicDatasetProfiler.html
        validations/
            profiling/
                random/
                    default/
                        f1/
                            BasicDatasetProfiler.html
                        f2/
                            BasicDatasetProfiler.html
                titanic/
                    default/
                        Titanic/
                            BasicDatasetProfiler.html
"""

    # save data_docs locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/data_docs")

    if os.path.isdir("./tests/data_context/output/data_docs"):
        shutil.rmtree("./tests/data_context/output/data_docs")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"),
                    "./tests/data_context/output/data_docs")
Пример #15
0
def init(target_directory):
    """Initialize a new Great Expectations project.

    This guided input walks the user through setting up a project.

    It scaffolds directories, sets up notebooks, creates a project file, and
    appends to a `.gitignore` file.
    """
    six.print_(
        colored(figlet_format("Great Expectations", font="big"), color="cyan"))

    cli_message(greeting_1)

    if not click.confirm(msg_prompt_lets_begin, default=True):
        cli_message(
            "OK - run great_expectations init again when ready. Exiting...")
        exit(0)

    try:
        context = DataContext.create(target_directory)
    except DataContextError as err:
        logger.critical(err.message)
        sys.exit(-1)

    base_dir = context.root_directory
    scaffold_directories_and_notebooks(base_dir)
    cli_message("\nDone.", )

    data_source_name = add_datasource(context)
    cli_message("""
========== Profiling ==========

Would you like to profile '{0:s}' to create candidate expectations and documentation?

Please note: Profiling is still a beta feature in Great Expectations.  The current profiler will evaluate the entire 
data source (without sampling), which may be very time consuming. 
As a rule of thumb, we recommend starting with data smaller than 100MB.

To learn more about profiling, visit <blue>https://docs.greatexpectations.io/en/latest/guides/profiling.html\
?utm_source=cli&utm_medium=init&utm_campaign={1:s}</blue>.
        """.format(data_source_name, __version__.replace(".", "_")))
    if click.confirm("Proceed?", default=True):
        profiling_results = profile_datasource(context, data_source_name)
        cli_message("""
========== Data Documentation ==========

To generate documentation from the data you just profiled, the profiling results should be moved from 
great_expectations/uncommitted (ignored by git) to great_expectations/fixtures.

Before committing, please make sure that this data does not contain sensitive information!

To learn more: <blue>https://docs.greatexpectations.io/en/latest/guides/data_documentation.html\
?utm_source=cli&utm_medium=init&utm_campaign={0:s}</blue>
""".format(__version__.replace(".", "_")))
        if click.confirm(
                "Move the profiled data and build HTML documentation?",
                default=True):
            cli_message("\nMoving files...")

            for profiling_result in profiling_results:
                data_asset_name = profiling_result[1]['meta'][
                    'data_asset_name']
                expectation_suite_name = profiling_result[1]['meta'][
                    'expectation_suite_name']
                run_id = profiling_result[1]['meta']['run_id']
                context.move_validation_to_fixtures(data_asset_name,
                                                    expectation_suite_name,
                                                    run_id)

            cli_message("\nDone.")

            cli_message("\nBuilding documentation...")
            build_documentation(context)

        else:
            cli_message("Okay, skipping HTML documentation for now.`.")

    else:
        cli_message("Okay, skipping profiling for now. You can always do this "
                    "later by running `great_expectations profile`.")

    cli_message(msg_go_to_notebook)
def test_data_context_create_does_not_raise_error_or_warning_if_ge_dir_exists(
        tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('data_context'))
    DataContext.create(project_path)
def test_render_full_static_site_from_empty_project(tmp_path_factory,
                                                    filesystem_csv_3):

    # TODO : Use a standard test fixture
    # TODO : Have that test fixture copy a directory, rather than building a new one from scratch

    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    shutil.copy(file_relative_path(__file__, "../test_sets/Titanic.csv"),
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
"""

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    context.add_datasource("titanic",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           generators={
                               "subdir_reader": {
                                   "class_name":
                                   "SubdirReaderBatchKwargsGenerator",
                                   "base_directory":
                                   os.path.join(project_dir, "data/titanic/")
                               }
                           })

    context.add_datasource("random",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           generators={
                               "subdir_reader": {
                                   "class_name":
                                   "SubdirReaderBatchKwargsGenerator",
                                   "base_directory":
                                   os.path.join(project_dir, "data/random/")
                               }
                           })

    context.profile_datasource("titanic")

    # Replicate the batch id of the batch that will be profiled in order to generate the file path of the
    # validation result
    titanic_profiled_batch_id = PathBatchKwargs({
        'path':
        os.path.join(project_dir, 'data/titanic/Titanic.csv'),
        'datasource':
        'titanic'
    }).to_id()

    tree_str = gen_directory_tree_str(project_dir)
    assert tree_str == """project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
    great_expectations/
        .gitignore
        great_expectations.yml
        expectations/
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler.json
        notebooks/
            pandas/
                validation_playground.ipynb
            spark/
                validation_playground.ipynb
            sql/
                validation_playground.ipynb
        plugins/
            custom_data_docs/
                renderers/
                styles/
                    data_docs_custom_styles.css
                views/
        uncommitted/
            config_variables.yml
            data_docs/
            samples/
            validations/
                titanic/
                    subdir_reader/
                        Titanic/
                            BasicDatasetProfiler/
                                profiling/
                                    {}.json
""".format(titanic_profiled_batch_id)

    context.profile_datasource("random")
    context.build_data_docs()

    f1_profiled_batch_id = PathBatchKwargs({
        'path':
        os.path.join(project_dir, 'data/random/f1.csv'),
        'datasource':
        'random'
    }).to_id()

    f2_profiled_batch_id = PathBatchKwargs({
        'path':
        os.path.join(project_dir, 'data/random/f2.csv'),
        'datasource':
        'random'
    }).to_id()

    data_docs_dir = os.path.join(project_dir,
                                 "great_expectations/uncommitted/data_docs")
    observed = gen_directory_tree_str(data_docs_dir)
    assert observed == """\
data_docs/
    local_site/
        index.html
        expectations/
            random/
                subdir_reader/
                    f1/
                        BasicDatasetProfiler.html
                    f2/
                        BasicDatasetProfiler.html
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler.html
        static/
            fonts/
                HKGrotesk/
                    HKGrotesk-Bold.otf
                    HKGrotesk-BoldItalic.otf
                    HKGrotesk-Italic.otf
                    HKGrotesk-Light.otf
                    HKGrotesk-LightItalic.otf
                    HKGrotesk-Medium.otf
                    HKGrotesk-MediumItalic.otf
                    HKGrotesk-Regular.otf
                    HKGrotesk-SemiBold.otf
                    HKGrotesk-SemiBoldItalic.otf
            images/
                favicon.ico
                glossary_scroller.gif
                iterative-dev-loop.png
                logo-long-vector.svg
                logo-long.png
                short-logo-vector.svg
                short-logo.png
                validation_failed_unexpected_values.gif
            styles/
                data_docs_custom_styles_template.css
                data_docs_default_styles.css
        validations/
            random/
                subdir_reader/
                    f1/
                        BasicDatasetProfiler/
                            profiling/
                                {0:s}.html
                    f2/
                        BasicDatasetProfiler/
                            profiling/
                                {1:s}.html
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler/
                            profiling/
                                {2:s}.html
""".format(f1_profiled_batch_id, f2_profiled_batch_id,
           titanic_profiled_batch_id)

    # save data_docs locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/data_docs")

    if os.path.isdir("./tests/data_context/output/data_docs"):
        shutil.rmtree("./tests/data_context/output/data_docs")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"),
                    "./tests/data_context/output/data_docs")
Пример #18
0
def _complete_onboarding(target_dir):
    if click.confirm(COMPLETE_ONBOARDING_PROMPT, default=True):
        DataContext.create(target_dir)
        cli_message(ONBOARDING_COMPLETE)
    else:
        cli_message(RUN_INIT_AGAIN)
def test_render_full_static_site(tmp_path_factory, filesystem_csv_3):
    project_dir = str(tmp_path_factory.mktemp("project_dir"))
    print(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    scaffold_directories_and_notebooks(ge_directory)
    context.add_datasource("titanic",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))
    context.add_datasource("random",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")
    glob_str = os.path.join(
        ge_directory,
        "uncommitted/validations/*/titanic/default/Titanic/BasicDatasetProfiler.json"
    )
    print(glob_str)
    glob_result = glob(glob_str)
    os.mkdir(os.path.join(ge_directory, "fixtures/validations"))
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic"))
    os.mkdir(os.path.join(ge_directory,
                          "fixtures/validations/titanic/default"))
    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/titanic/default/Titanic/")
    os.mkdir(full_fixture_path)
    shutil.copy(glob_result[0],
                full_fixture_path + "BasicDatasetProfiler.json")

    context.profile_datasource("random")
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/random"))
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/random/default"))

    glob_str = os.path.join(
        ge_directory,
        "uncommitted/validations/*/random/default/f*/BasicDatasetProfiler.json"
    )
    print(glob_str)
    glob_result = glob(glob_str)

    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/random/default/f1/")
    os.mkdir(full_fixture_path)
    shutil.copy(
        glob_result[0],  # !!! This might switch the f1 and f2 files...
        full_fixture_path + "BasicDatasetProfiler.json")
    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/random/default/f2/")
    os.mkdir(full_fixture_path)
    shutil.copy(
        glob_result[1],  # !!! This might switch the f1 and f2 files...
        full_fixture_path + "BasicDatasetProfiler.json")
    # for g in glob_result:
    #     shutil.copy(
    #         g,
    #         full_fixture_path+"BasicDatasetProfiler.json"
    #     )

    # os.mkdir(os.path.join(ge_directory,"fixtures")
    context.render_full_static_site()

    # Titanic
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))

    with open(
            os.path.join(
                ge_directory,
                "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json"
            ), "r") as infile:
        titanic_validation = json.load(infile)
    titanic_run_id = titanic_validation['meta']['run_id']
    titanic_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format(
        run_id=titanic_run_id.replace(':', ''), )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/titanic/default/Titanic/{filename}".
            format(filename=titanic_validation_html_filename)))
    # f1
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/random/default/f1/BasicDatasetProfiler.json")
    )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f1/BasicDatasetProfiler.html"
        ))

    with open(
            os.path.join(
                ge_directory,
                "fixtures/validations/random/default/f1/BasicDatasetProfiler.json"
            ), "r") as infile:
        f1_validation = json.load(infile)
    f1_run_id = f1_validation['meta']['run_id']
    f1_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format(
        run_id=f1_run_id.replace(':', ''), )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f1/{filename}".format(
                filename=f1_validation_html_filename)))
    # f2
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/random/default/f2/BasicDatasetProfiler.json")
    )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html"
        ))

    with open(
            os.path.join(
                ge_directory,
                "fixtures/validations/random/default/f2/BasicDatasetProfiler.json"
            ), "r") as infile:
        f2_validation = json.load(infile)
    f2_run_id = f2_validation['meta']['run_id']
    f2_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format(
        run_id=f2_run_id.replace(':', ''), )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f2/{filename}".format(
                filename=f2_validation_html_filename)))

    # full site
    assert os.path.exists(
        os.path.join(ge_directory, "uncommitted/documentation/index.html"))

    # save documentation locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/documentation")

    safe_mmkdir("./tests/data_context/output/documentation/titanic")
    try:
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/titanic/default"),
            "./tests/data_context/output/documentation/titanic/default")
    except FileExistsError:
        shutil.rmtree(
            "./tests/data_context/output/documentation/titanic/default")
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/titanic/default"),
            "./tests/data_context/output/documentation/titanic/default")

    safe_mmkdir("./tests/data_context/output/documentation/random")
    try:
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/random/default"),
            "./tests/data_context/output/documentation/random/default")
    except FileExistsError:
        shutil.rmtree(
            "./tests/data_context/output/documentation/random/default")
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/random/default"),
            "./tests/data_context/output/documentation/random/default")

    shutil.copy(
        os.path.join(ge_directory, "uncommitted/documentation/index.html"),
        "./tests/data_context/output/documentation")
def ge_data_context(tmp_path: str) -> DataContext:
    return DataContext.create(tmp_path)
def test_render_full_static_site(tmp_path_factory, filesystem_csv_3):
    project_dir = str(tmp_path_factory.mktemp("project_dir"))
    print(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    scaffold_directories_and_notebooks(ge_directory)
    context.add_datasource("titanic",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))
    context.add_datasource("random",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")
    glob_str = os.path.join(
        ge_directory,
        "uncommitted/validations/*/titanic/default/Titanic/BasicDatasetProfiler.json"
    )
    print(glob_str)
    glob_result = glob(glob_str)
    os.mkdir(os.path.join(ge_directory, "fixtures/validations"))
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic"))
    os.mkdir(os.path.join(ge_directory,
                          "fixtures/validations/titanic/default"))
    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/titanic/default/Titanic/")
    os.mkdir(full_fixture_path)
    shutil.copy(glob_result[0],
                full_fixture_path + "BasicDatasetProfiler.json")

    context.profile_datasource("random")
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/random"))
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/random/default"))

    glob_str = os.path.join(
        ge_directory,
        "uncommitted/validations/*/random/default/f*/BasicDatasetProfiler.json"
    )
    print(glob_str)
    glob_result = glob(glob_str)

    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/random/default/f1/")
    os.mkdir(full_fixture_path)
    shutil.copy(
        glob_result[0],  # !!! This might switch the f1 and f2 files...
        full_fixture_path + "BasicDatasetProfiler.json")
    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/random/default/f2/")
    os.mkdir(full_fixture_path)
    shutil.copy(
        glob_result[1],  # !!! This might switch the f1 and f2 files...
        full_fixture_path + "BasicDatasetProfiler.json")
    # for g in glob_result:
    #     shutil.copy(
    #         g,
    #         full_fixture_path+"BasicDatasetProfiler.json"
    #     )

    # os.mkdir(os.path.join(ge_directory,"fixtures")
    context.render_full_static_site()

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f1/BasicDatasetProfiler.html"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html"
        ))

    # Store output files locally
    # shutil.copy(
    #     os.path.join(
    #         ge_directory,
    #         "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html"
    #     ),
    #     "test_output/f2_BasicDatasetProfiler.html"

    # )

    with open(
            os.path.join(
                ge_directory,
                "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html"
            ), 'r') as f:
        # print(f.read())
        pass

    assert os.path.exists(
        os.path.join(ge_directory, "uncommitted/documentation/index.html"))
Пример #22
0
def test_render_full_static_site(tmp_path_factory, filesystem_csv_3):
    project_dir = str(tmp_path_factory.mktemp("project_dir"))
    print(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    scaffold_directories_and_notebooks(ge_directory)
    context.add_datasource("titanic",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))
    context.add_datasource("random",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")

    context.profile_datasource("random")

    context.build_data_documentation()

    # Titanic

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/validations/profiling/titanic/default/Titanic/BasicDatasetProfiler.json"
        ))

    assert os.path.exists(
        os.path.join(  # profiling results HTML
            ge_directory,
            "uncommitted/documentation/local_site/profiling/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))

    assert os.path.exists(
        os.path.join(  # profiling expectations HTML
            ge_directory,
            "uncommitted/documentation/local_site/expectations/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))

    # f1

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/validations/profiling/random/default/f1/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(  # profiling results HTML
            ge_directory,
            "uncommitted/documentation/local_site/profiling/random/default/f1/BasicDatasetProfiler.html"
        ))

    assert os.path.exists(
        os.path.join(  # profiling expectations HTML
            ge_directory,
            "uncommitted/documentation/local_site/profiling/random/default/f1/BasicDatasetProfiler.html"
        ))

    # f2

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/validations/profiling/random/default/f2/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/local_site/profiling/random/default/f2/BasicDatasetProfiler.html"
        ))

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/local_site/expectations/random/default/f2/BasicDatasetProfiler.html"
        ))

    # local_site index.html
    assert os.path.exists(
        os.path.join(ge_directory,
                     "uncommitted/documentation/local_site/index.html"))

    # team_site index.html
    assert os.path.exists(
        os.path.join(ge_directory,
                     "uncommitted/documentation/team_site/index.html"))

    # save documentation locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/documentation")

    if os.path.isdir("./tests/data_context/output/documentation"):
        shutil.rmtree("./tests/data_context/output/documentation")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/documentation/"),
                    "./tests/data_context/output/documentation")