Python safe_mmkdir示例，great_expectations.data_context.util.safe_mmkdir Python示例

示例#1

0

显示文件

def test_subdir_reader_file_partitioning(tmp_path_factory):
    base_directory = str(tmp_path_factory.mktemp("test_folder_connection_path"))
    mock_files = [
        "20190101__asset_1.csv",
        "20190102__asset_1.csv",
        "20190103__asset_1.csv",
        "asset_2/20190101__asset_2.csv",
        "asset_2/20190102__asset_2.csv"
    ]
    for file in mock_files:
        if "/" in file:
            safe_mmkdir(os.path.join(base_directory, file.split("/")[0]))
        open(os.path.join(base_directory, file), "w").close()

    # If we have files, we should see them as individual assets
    subdir_reader_generator = SubdirReaderGenerator("test_generator", base_directory=base_directory)

    known_assets = subdir_reader_generator.get_available_data_asset_names()
    assert set(known_assets) == {
        "20190101__asset_1",
        "20190102__asset_1",
        "20190103__asset_1",
        "asset_2"
    }

    # SubdirReaderGenerator uses the filename as partition name for root files
    known_partitions = subdir_reader_generator.get_available_partition_ids("20190101__asset_1")
    assert set(known_partitions) == {"20190101__asset_1"}

    kwargs = subdir_reader_generator.build_batch_kwargs_from_partition_id("20190101__asset_1", "20190101__asset_1")
    assert kwargs["path"] == os.path.join(base_directory, "20190101__asset_1.csv")

示例#2

0

显示文件

def data_context(tmp_path_factory):
    # This data_context is *manually* created to have the config we want, vs created with DataContext.create
    project_path = str(tmp_path_factory.mktemp('data_context'))
    context_path = os.path.join(project_path, "great_expectations")
    asset_config_path = os.path.join(context_path, "expectations")
    safe_mmkdir(os.path.join(asset_config_path,
                             "mydatasource/mygenerator/my_dag_node"),
                exist_ok=True)
    shutil.copy("./tests/test_fixtures/great_expectations_basic.yml",
                str(os.path.join(context_path, "great_expectations.yml")))
    shutil.copy(
        "./tests/test_fixtures/expectation_suites/parameterized_expectation_suite_fixture.json",
        os.path.join(asset_config_path,
                     "mydatasource/mygenerator/my_dag_node/default.json"))

    safe_mmkdir(os.path.join(context_path, "plugins"))
    shutil.copy(
        "./tests/test_fixtures/custom_pandas_dataset.py",
        str(os.path.join(context_path, "plugins", "custom_pandas_dataset.py")))
    shutil.copy(
        "./tests/test_fixtures/custom_sqlalchemy_dataset.py",
        str(
            os.path.join(context_path, "plugins",
                         "custom_sqlalchemy_dataset.py")))
    shutil.copy(
        "./tests/test_fixtures/custom_sparkdf_dataset.py",
        str(os.path.join(context_path, "plugins",
                         "custom_sparkdf_dataset.py")))
    return ge.data_context.DataContext(context_path)

示例#3

0

显示文件

def empty_data_context(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('empty_data_context'))
    context = ge.data_context.DataContext.create(project_path)
    context_path = os.path.join(project_path, "great_expectations")
    asset_config_path = os.path.join(context_path, "expectations")
    safe_mmkdir(asset_config_path, exist_ok=True)
    return context

示例#4

0

显示文件

文件： tuple_store_backend.py 项目： kepiej/great_expectations

    def __init__(self,
                 base_directory,
                 filepath_template=None,
                 filepath_prefix=None,
                 filepath_suffix=None,
                 forbidden_substrings=None,
                 platform_specific_separator=True,
                 root_directory=None,
                 fixed_length_key=False):
        super().__init__(
            filepath_template=filepath_template,
            filepath_prefix=filepath_prefix,
            filepath_suffix=filepath_suffix,
            forbidden_substrings=forbidden_substrings,
            platform_specific_separator=platform_specific_separator,
            fixed_length_key=fixed_length_key)
        if os.path.isabs(base_directory):
            self.full_base_directory = base_directory
        else:
            if root_directory is None:
                raise ValueError(
                    "base_directory must be an absolute path if root_directory is not provided"
                )
            elif not os.path.isabs(root_directory):
                raise ValueError(
                    "root_directory must be an absolute path. Got {0} instead."
                    .format(root_directory))
            else:
                self.full_base_directory = os.path.join(
                    root_directory, base_directory)

        safe_mmkdir(str(os.path.dirname(self.full_base_directory)))

示例#5

0

显示文件

文件： test_data_context.py 项目： abnair24/great_expectations

def test_load_config_variables_file(basic_data_context_config,
                                    tmp_path_factory):
    # Setup:
    base_path = str(tmp_path_factory.mktemp('test_load_config_variables_file'))
    safe_mmkdir(os.path.join(base_path, "uncommitted"))
    with open(os.path.join(base_path, "uncommitted", "dev_variables.yml"),
              "w") as outfile:
        yaml.dump({'env': 'dev'}, outfile)
    with open(os.path.join(base_path, "uncommitted", "prod_variables.yml"),
              "w") as outfile:
        yaml.dump({'env': 'prod'}, outfile)
    basic_data_context_config[
        "config_variables_file_path"] = "uncommitted/${TEST_CONFIG_FILE_ENV}_variables.yml"

    try:
        # We should be able to load different files based on an environment variable
        os.environ["TEST_CONFIG_FILE_ENV"] = "dev"
        context = BaseDataContext(basic_data_context_config,
                                  context_root_dir=base_path)
        config_vars = context._load_config_variables_file()
        assert config_vars['env'] == 'dev'
        os.environ["TEST_CONFIG_FILE_ENV"] = "prod"
        context = BaseDataContext(basic_data_context_config,
                                  context_root_dir=base_path)
        config_vars = context._load_config_variables_file()
        assert config_vars['env'] == 'prod'
    except Exception:
        raise
    finally:
        # Make sure we unset the environment variable we're using
        del os.environ["TEST_CONFIG_FILE_ENV"]

示例#6

0

显示文件

def titanic_data_context(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('titanic_data_context'))
    context_path = os.path.join(project_path, "great_expectations")
    safe_mmkdir(os.path.join(context_path, "expectations"), exist_ok=True)
    data_path = os.path.join(context_path, "../data")
    safe_mmkdir(os.path.join(data_path), exist_ok=True)
    shutil.copy("./tests/test_fixtures/great_expectations_titanic.yml",
                str(os.path.join(context_path, "great_expectations.yml")))
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(context_path, "../data/Titanic.csv")))
    return ge.data_context.DataContext(context_path)

示例#7

0

显示文件

def test_get_available_data_asset_names_for_query_path(empty_data_context):
    # create queries path
    context_path = empty_data_context.root_directory
    query_path = os.path.join(
        context_path, "datasources/mydatasource/generators/mygenerator")
    safe_mmkdir(query_path)
    shutil.copy(file_relative_path(__file__, "../../test_fixtures/dummy.sql"),
                query_path)

    data_source = Datasource(name="mydatasource",
                             data_context=empty_data_context)
    generator = QueryBatchKwargsGenerator(name="mygenerator",
                                          datasource=data_source)
    sql_list = generator.get_available_data_asset_names()
    assert ("dummy", "query") in sql_list["names"]

示例#8

0

显示文件

文件： test_usage_statistics.py 项目： tsanikgr/great_expectations

def test_opt_out_yml(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    safe_mmkdir(context_path, exist_ok=True)
    fixture_dir = file_relative_path(__file__, "../../test_fixtures")

    shutil.copy(
        os.path.join(fixture_dir,
                     "great_expectations_basic_with_usage_stats_disabled.yml"),
        str(os.path.join(context_path, "great_expectations.yml")),
    )

    assert DataContext(
        context_root_dir=context_path
    )._project_config.anonymous_usage_statistics.enabled is False

示例#9

0

显示文件

def filesystem_csv(tmp_path_factory):
    base_dir = tmp_path_factory.mktemp('filesystem_csv')
    base_dir = str(base_dir)
    # Put a few files in the directory
    with open(os.path.join(base_dir, "f1.csv"), "w") as outfile:
        outfile.writelines(["a,b,c\n"])
    with open(os.path.join(base_dir, "f2.csv"), "w") as outfile:
        outfile.writelines(["a,b,c\n"])

    safe_mmkdir(os.path.join(base_dir, "f3"))
    with open(os.path.join(base_dir, "f3", "f3_20190101.csv"), "w") as outfile:
        outfile.writelines(["a,b,c\n"])
    with open(os.path.join(base_dir, "f3", "f3_20190102.csv"), "w") as outfile:
        outfile.writelines(["a,b,c\n"])

    return base_dir

示例#10

0

显示文件

文件： test_data_context.py 项目： DanielOliver/great_expectations

def test_load_data_context_from_environment_variables(tmp_path_factory):
    try:
        project_path = str(tmp_path_factory.mktemp('data_context'))
        context_path = os.path.join(project_path, "great_expectations")
        safe_mmkdir(context_path)
        shutil.copy("./tests/test_fixtures/great_expectations_basic.yml",
                    str(os.path.join(context_path, "great_expectations.yml")))
        with pytest.raises(DataContextError) as err:
            DataContext.find_context_root_dir()
            assert "Unable to locate context root directory." in err

        os.environ["GE_HOME"] = context_path
        assert DataContext.find_context_root_dir() == context_path
    except Exception:
        raise
    finally:
        # Make sure we unset the environment variable we're using
        del os.environ["GE_HOME"]

示例#11

0

显示文件

文件： tuple_store_backend.py 项目： kepiej/great_expectations

    def _set(self, key, value, **kwargs):
        if not isinstance(key, tuple):
            key = key.to_tuple()
        filepath = os.path.join(self.full_base_directory,
                                self._convert_key_to_filepath(key))
        path, filename = os.path.split(filepath)

        safe_mmkdir(str(path))
        with open(filepath, "wb") as outfile:
            if isinstance(value, string_types):
                # Following try/except is to support py2, since both str and bytes objects pass above condition
                try:
                    outfile.write(value.encode("utf-8"))
                except UnicodeDecodeError:
                    outfile.write(value)
            else:
                outfile.write(value)
        return filepath

示例#12

0

显示文件

def test_subdir_reader_file_partitioning(basic_pandas_datasource,
                                         tmp_path_factory):
    base_directory = str(
        tmp_path_factory.mktemp("test_folder_connection_path"))
    mock_files = [
        "20190101__asset_1.csv", "20190102__asset_1.csv",
        "20190103__asset_1.csv", "asset_2/20190101__asset_2.csv",
        "asset_2/20190102__asset_2.csv"
    ]
    for file in mock_files:
        if "/" in file:
            safe_mmkdir(os.path.join(base_directory, file.split("/")[0]))
        open(os.path.join(base_directory, file), "w").close()

    # If we have files, we should see them as individual assets
    subdir_reader_generator = SubdirReaderBatchKwargsGenerator(
        "test_generator",
        datasource=basic_pandas_datasource,
        base_directory=base_directory)

    known_assets = subdir_reader_generator.get_available_data_asset_names(
    )["names"]
    assert set(known_assets) == {("20190101__asset_1", "file"),
                                 ("20190102__asset_1", "file"),
                                 ("20190103__asset_1", "file"),
                                 ("asset_2", "directory")}

    # SubdirReaderBatchKwargsGenerator uses the filename as partition name for root files
    known_partitions = subdir_reader_generator.get_available_partition_ids(
        "20190101__asset_1")
    assert set(known_partitions) == {"20190101__asset_1"}

    kwargs = subdir_reader_generator.build_batch_kwargs(
        "20190101__asset_1", "20190101__asset_1")
    assert kwargs["path"] == os.path.join(base_directory,
                                          "20190101__asset_1.csv")

    # We should also be able to pass a limit
    kwargs = subdir_reader_generator.build_batch_kwargs("20190101__asset_1",
                                                        "20190101__asset_1",
                                                        limit=10)
    assert kwargs["path"] == os.path.join(base_directory,
                                          "20190101__asset_1.csv")
    assert kwargs["reader_options"]["nrows"] == 10

示例#13

0

显示文件

文件： test_query_generator.py 项目： tfeusels/great_expectations

def test_get_available_data_asset_names_for_query_path(empty_data_context):

    # create queries path
    context_path = empty_data_context.root_directory
    safe_mmkdir(
        os.path.join(
            context_path,
            "datasources/mydatasource/generators/mygenerator/queries"))
    shutil.copy(
        "./tests/test_fixtures/dummy.sql",
        str(
            os.path.join(context_path, "datasources", "mydatasource",
                         "generators", "mygenerator", "queries")))

    data_source = Datasource(name="mydatasource",
                             data_context=empty_data_context)
    generator = QueryGenerator(name="mygenerator", datasource=data_source)
    sql_list = generator.get_available_data_asset_names()
    assert "dummy" in sql_list

示例#14

0

显示文件

文件： test_subdir_reader_generator.py 项目： abegong/great_expectations-1

def test_subdir_reader_configurable_reader_method(basic_pandas_datasource, tmp_path_factory):
    base_directory = str(tmp_path_factory.mktemp("test_folder_connection_path"))
    mock_files = [
        "20190101__asset_1.dat",
        "20190102__asset_1.dat",
        "20190103__asset_1.dat",
        "asset_2/20190101__asset_2.dat",
        "asset_2/20190102__asset_2.dat"
    ]
    for file in mock_files:
        if "/" in file:
            safe_mmkdir(os.path.join(base_directory, file.split("/")[0]))
        open(os.path.join(base_directory, file), "w").close()

    # If we have files, we should see them as individual assets
    subdir_reader_generator = SubdirReaderBatchKwargsGenerator("test_generator", datasource=basic_pandas_datasource,
                                                               base_directory=base_directory,
                                                               reader_method='csv', known_extensions=['.dat'])
    batch_kwargs = next(subdir_reader_generator.get_iterator('asset_2'))
    assert batch_kwargs['reader_method'] == 'csv'

示例#15

0

显示文件

文件： test_usage_statistics.py 项目： tsanikgr/great_expectations

def test_opt_out_env_var_overrides_yml(tmp_path_factory, monkeypatch):
    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    safe_mmkdir(context_path, exist_ok=True)
    fixture_dir = file_relative_path(__file__, "../../test_fixtures")

    shutil.copy(
        os.path.join(fixture_dir,
                     "great_expectations_basic_with_usage_stats_enabled.yml"),
        str(os.path.join(context_path, "great_expectations.yml")),
    )

    assert DataContext(
        context_root_dir=context_path
    )._project_config.anonymous_usage_statistics.enabled is True

    monkeypatch.setenv("GE_USAGE_STATS", "False")
    context = DataContext(context_root_dir=context_path)
    project_config = context._project_config
    assert project_config.anonymous_usage_statistics.enabled is False

示例#16

0

显示文件

文件： init.py 项目： talagluck/great_expectations

def scaffold_directories_and_notebooks(base_dir):
    """Add basic directories for an initial, opinionated GE project."""

    safe_mmkdir(base_dir, exist_ok=True)
    notebook_dir_name = "notebooks"

    open(os.path.join(base_dir, ".gitignore"), 'w').write("uncommitted/")

    for directory in [
            notebook_dir_name, "expectations", "datasources", "uncommitted",
            "plugins", "fixtures"
    ]:
        safe_mmkdir(os.path.join(base_dir, directory), exist_ok=True)

    for uncommitted_directory in [
            "validations", "credentials", "documentation", "samples"
    ]:
        safe_mmkdir(os.path.join(base_dir, "uncommitted",
                                 uncommitted_directory),
                    exist_ok=True)

    for notebook in glob.glob(
            script_relative_path("../init_notebooks/*.ipynb")):
        notebook_name = os.path.basename(notebook)
        shutil.copyfile(
            notebook, os.path.join(base_dir, notebook_dir_name, notebook_name))

示例#17

0

显示文件

文件： test_data_context.py 项目： 88sanjay/great_expectations

def test_load_data_context_from_environment_variables(tmp_path_factory):
    curdir = os.path.abspath(os.getcwd())
    try:
        project_path = str(tmp_path_factory.mktemp('data_context'))
        context_path = os.path.join(project_path, "great_expectations")
        safe_mmkdir(context_path)
        os.chdir(context_path)
        with pytest.raises(DataContextError) as err:
            DataContext.find_context_root_dir()
        assert isinstance(err.value, ConfigNotFoundError)

        shutil.copy(file_relative_path(__file__, "../test_fixtures/great_expectations_basic.yml"),
                    str(os.path.join(context_path, "great_expectations.yml")))
        os.environ["GE_HOME"] = context_path
        assert DataContext.find_context_root_dir() == context_path
    except Exception:
        raise
    finally:
        # Make sure we unset the environment variable we're using
        if "GE_HOME" in os.environ:
            del os.environ["GE_HOME"]
        os.chdir(curdir)

示例#18

0

显示文件

文件： test_usage_statistics.py 项目： tsanikgr/great_expectations

def test_opt_out_etc_overrides_yml(tmp_path_factory):
    home_config_dir = tmp_path_factory.mktemp("home_dir")
    home_config_dir = str(home_config_dir)
    etc_config_dir = tmp_path_factory.mktemp("etc")
    etc_config_dir = str(etc_config_dir)
    config_dirs = [home_config_dir, etc_config_dir]
    config_dirs = [
        os.path.join(config_dir, "great_expectations.conf")
        for config_dir in config_dirs
    ]

    disabled_config = configparser.ConfigParser()
    disabled_config["anonymous_usage_statistics"] = {"enabled": False}

    with open(os.path.join(etc_config_dir, "great_expectations.conf"),
              'w') as configfile:
        disabled_config.write(configfile)

    project_path = str(tmp_path_factory.mktemp("data_context"))
    context_path = os.path.join(project_path, "great_expectations")
    safe_mmkdir(context_path, exist_ok=True)
    fixture_dir = file_relative_path(__file__, "../../test_fixtures")

    shutil.copy(
        os.path.join(fixture_dir,
                     "great_expectations_basic_with_usage_stats_enabled.yml"),
        str(os.path.join(context_path, "great_expectations.yml")),
    )

    assert DataContext(
        context_root_dir=context_path
    )._project_config.anonymous_usage_statistics.enabled is True

    with mock.patch(
            "great_expectations.data_context.BaseDataContext.GLOBAL_CONFIG_PATHS",
            config_dirs):
        context = DataContext(context_root_dir=context_path)
        project_config = context._project_config
        assert project_config.anonymous_usage_statistics.enabled is False

示例#19

0

显示文件

def test_safe_mmkdir(tmp_path_factory):
    project_path = str(tmp_path_factory.mktemp('empty_dir'))

    first_path = os.path.join(project_path, "first_path")

    safe_mmkdir(first_path)
    assert os.path.isdir(first_path)

    with pytest.raises(TypeError):
        safe_mmkdir(1)

    #This should trigger python 2
    if six.PY2:
        with pytest.raises(TypeError) as e:
            next_project_path = tmp_path_factory.mktemp(
                'test_safe_mmkdir__dir_b')
            safe_mmkdir(next_project_path)

        assert e.value.message == "directory must be of type str, not {'directory_type': \"<class 'pathlib2.PosixPath'>\"}"

示例#20

0

显示文件

def test_configuration_driven_site_builder(site_builder_data_context_with_html_store_titanic_random):
    context = site_builder_data_context_with_html_store_titanic_random

    context.add_validation_operator(
        "validate_and_store",
        {
        "class_name": "ActionListValidationOperator",
        "action_list": [{
            "name": "store_validation_result",
            "action": {
                "class_name": "StoreAction",
                "target_store_name": "validations_store",
            }
        }, {
            "name": "extract_and_store_eval_parameters",
            "action": {
                "class_name": "ExtractAndStoreEvaluationParamsAction",
                "target_store_name": "evaluation_parameter_store",
            }
        }]
        }
    )

    # profiling the Titanic datasource will generate one expectation suite and one validation
    # that is a profiling result
    context.profile_datasource(context.list_datasources()[0]["name"])

    # creating another validation result using the profiler's suite (no need to use a new expectation suite
    # for this test). having two validation results - one with run id "profiling" - allows us to test
    # the logic of run_id_filter that helps filtering validation results to be included in
    # the profiling and the validation sections.
    batch = context.get_batch('Titanic', expectation_suite_name='BasicDatasetProfiler',
                              batch_kwargs=context.yield_batch_kwargs('Titanic'))
    run_id = "test_run_id_12345"
    context.run_validation_operator(
        assets_to_validate=[batch],
        run_id=run_id,
        validation_operator_name="validate_and_store",
    )

    data_docs_config = context._project_config.get('data_docs_sites')
    local_site_config = data_docs_config['local_site']
    # local_site_config.pop('module_name')  # This isn't necessary
    local_site_config.pop('class_name')

    # set datasource_whitelist
    local_site_config['datasource_whitelist'] = ['titanic']

    keys_as_strings = [x.to_string() for x in context.stores["validations_store"].list_keys()]
    assert set(keys_as_strings) == set([
        "ValidationResultIdentifier.titanic.default.Titanic.BasicDatasetProfiler.test_run_id_12345",
        "ValidationResultIdentifier.titanic.default.Titanic.BasicDatasetProfiler.profiling",
        "ValidationResultIdentifier.random.default.f2.BasicDatasetProfiler.profiling",
        "ValidationResultIdentifier.random.default.f1.BasicDatasetProfiler.profiling",
    ])

    site_builder = SiteBuilder(
            data_context=context,
            **local_site_config
        )
    res = site_builder.build()

    index_page_locator_info = res[0]
    index_links_dict = res[1]

    print( json.dumps(index_page_locator_info, indent=2) )
    assert index_page_locator_info == context.root_directory + '/uncommitted/data_docs/local_site/index.html'

    print( json.dumps(index_links_dict, indent=2) )
    assert json.loads(json.dumps(index_links_dict)) == json.loads("""\
{
  "titanic": {
    "default": {
      "Titanic": {
        "profiling_links": [
          {
            "full_data_asset_name": "titanic/default/Titanic",
            "expectation_suite_name": "BasicDatasetProfiler",
            "filepath": "validations/profiling/titanic/default/Titanic/BasicDatasetProfiler.html",
            "source": "titanic",
            "generator": "default",
            "asset": "Titanic",
            "run_id": "profiling",
            "validation_success": false
          }
        ],
        "validations_links": [
          {
            "full_data_asset_name": "titanic/default/Titanic",
            "expectation_suite_name": "BasicDatasetProfiler",
            "filepath": "validations/test_run_id_12345/titanic/default/Titanic/BasicDatasetProfiler.html",
            "source": "titanic",
            "generator": "default",
            "asset": "Titanic",
            "run_id": "test_run_id_12345",
            "validation_success": false
          }
        ],
        "expectations_links": [
          {
            "full_data_asset_name": "titanic/default/Titanic",
            "expectation_suite_name": "BasicDatasetProfiler",
            "filepath": "expectations/titanic/default/Titanic/BasicDatasetProfiler.html",
            "source": "titanic",
            "generator": "default",
            "asset": "Titanic",
            "run_id": null,
            "validation_success": null
          }
        ]
      }
    }
  }
}
    """)
    assert "random" not in index_links_dict, \
        """`random` must not appear in this documentation,
        because `datasource_whitelist` config option specifies only `titanic`"""

    assert len(index_links_dict['titanic']['default']['Titanic']['validations_links']) == 1, \
    """
    The only rendered validation should be the one not generated by the profiler
    """
    
    # save documentation locally
    safe_mmkdir("./tests/render/output")
    safe_mmkdir("./tests/render/output/documentation")

    if os.path.isdir("./tests/render/output/documentation"):
        shutil.rmtree("./tests/render/output/documentation")
    shutil.copytree(
        os.path.join(
            site_builder_data_context_with_html_store_titanic_random.root_directory,
            "uncommitted/data_docs/"
        ),
        "./tests/render/output/documentation"
    )

    # let's create another validation result and run the site builder to add it
    # to the data docs
    # the operator does not have an StoreAction action configured, so the site
    # will not be updated without our call to site builder

    ts_last_mod_0 = os.path.getmtime(os.path.join(site_builder.site_index_builder.target_store.store_backends[ValidationResultIdentifier].full_base_directory, "validations/test_run_id_12345/titanic/default/Titanic/BasicDatasetProfiler.html"))

    run_id = "test_run_id_12346"
    operator_result = context.run_validation_operator(
        assets_to_validate=[batch],
        run_id=run_id,
        validation_operator_name="validate_and_store",
    )

    validation_result_id = ValidationResultIdentifier(
        expectation_suite_identifier=[key for key in operator_result["details"].keys()][0],
        run_id=run_id)
    res = site_builder.build(resource_identifiers=[validation_result_id])

    index_links_dict = res[1]

    # verify that an additional validation result HTML file was generated
    assert len(index_links_dict["titanic"]["default"]["Titanic"]["validations_links"]) == 2

    site_builder.site_index_builder.target_store.store_backends[ValidationResultIdentifier].full_base_directory

    # verify that the validation result HTML file rendered in the previous run was NOT updated
    ts_last_mod_1 = os.path.getmtime(os.path.join(site_builder.site_index_builder.target_store.store_backends[ValidationResultIdentifier].full_base_directory, "validations/test_run_id_12345/titanic/default/Titanic/BasicDatasetProfiler.html"))

    assert ts_last_mod_0 == ts_last_mod_1

    print("mmm")

示例#21

0

显示文件

def test_render_full_static_site(tmp_path_factory, filesystem_csv_3):
    project_dir = str(tmp_path_factory.mktemp("project_dir"))
    print(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    scaffold_directories_and_notebooks(ge_directory)
    context.add_datasource("titanic",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))
    context.add_datasource("random",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")

    context.profile_datasource("random")

    context.build_data_documentation()

    # Titanic

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/validations/profiling/titanic/default/Titanic/BasicDatasetProfiler.json"
        ))

    assert os.path.exists(
        os.path.join(  # profiling results HTML
            ge_directory,
            "uncommitted/documentation/local_site/profiling/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))

    assert os.path.exists(
        os.path.join(  # profiling expectations HTML
            ge_directory,
            "uncommitted/documentation/local_site/expectations/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))

    # f1

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/validations/profiling/random/default/f1/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(  # profiling results HTML
            ge_directory,
            "uncommitted/documentation/local_site/profiling/random/default/f1/BasicDatasetProfiler.html"
        ))

    assert os.path.exists(
        os.path.join(  # profiling expectations HTML
            ge_directory,
            "uncommitted/documentation/local_site/profiling/random/default/f1/BasicDatasetProfiler.html"
        ))

    # f2

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/validations/profiling/random/default/f2/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/local_site/profiling/random/default/f2/BasicDatasetProfiler.html"
        ))

    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/local_site/expectations/random/default/f2/BasicDatasetProfiler.html"
        ))

    # local_site index.html
    assert os.path.exists(
        os.path.join(ge_directory,
                     "uncommitted/documentation/local_site/index.html"))

    # team_site index.html
    assert os.path.exists(
        os.path.join(ge_directory,
                     "uncommitted/documentation/team_site/index.html"))

    # save documentation locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/documentation")

    if os.path.isdir("./tests/data_context/output/documentation"):
        shutil.rmtree("./tests/data_context/output/documentation")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/documentation/"),
                    "./tests/data_context/output/documentation")

示例#22

0

显示文件

文件： test_data_documentation_site_builder.py 项目： syahdeini/great_expectations

def test_configuration_driven_site_builder(
        site_builder_data_context_with_html_store_titanic_random):
    context = site_builder_data_context_with_html_store_titanic_random

    context.add_validation_operator(
        "validate_and_store", {
            "class_name":
            "ActionListValidationOperator",
            "action_list": [{
                "name": "store_validation_result",
                "action": {
                    "class_name": "StoreValidationResultAction",
                    "target_store_name": "validations_store",
                }
            }, {
                "name": "extract_and_store_eval_parameters",
                "action": {
                    "class_name": "StoreEvaluationParametersAction",
                    "target_store_name": "evaluation_parameter_store",
                }
            }]
        })

    # profiling the Titanic datasource will generate one expectation suite and one validation
    # that is a profiling result
    datasource_name = 'titanic'
    data_asset_name = "Titanic"
    profiler_name = 'BasicDatasetProfiler'
    generator_name = "subdir_reader"
    context.profile_datasource(datasource_name)

    # creating another validation result using the profiler's suite (no need to use a new expectation suite
    # for this test). having two validation results - one with run id "profiling" - allows us to test
    # the logic of run_id_filter that helps filtering validation results to be included in
    # the profiling and the validation sections.
    batch_kwargs = context.build_batch_kwargs(datasource=datasource_name,
                                              generator=generator_name,
                                              name=data_asset_name)

    expectation_suite_name = "{}.{}.{}.{}".format(datasource_name,
                                                  generator_name,
                                                  data_asset_name,
                                                  profiler_name)

    batch = context.get_batch(
        batch_kwargs=batch_kwargs,
        expectation_suite_name=expectation_suite_name,
    )
    run_id = "test_run_id_12345"
    context.run_validation_operator(
        assets_to_validate=[batch],
        run_id=run_id,
        validation_operator_name="validate_and_store",
    )

    data_docs_config = context._project_config.data_docs_sites
    local_site_config = data_docs_config['local_site']
    # local_site_config.pop('module_name')  # This isn't necessary
    local_site_config.pop('class_name')

    validations_set = set(context.stores["validations_store"].list_keys())
    assert len(validations_set) == 4
    assert ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name=expectation_suite_name),
        run_id="test_run_id_12345",
        batch_identifier=batch.batch_id) in validations_set
    assert ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name=expectation_suite_name),
        run_id="profiling",
        batch_identifier=batch.batch_id) in validations_set
    assert ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name=expectation_suite_name),
        run_id="profiling",
        batch_identifier=batch.batch_id) in validations_set
    assert ValidationResultIdentifier(
        expectation_suite_identifier=ExpectationSuiteIdentifier(
            expectation_suite_name=expectation_suite_name),
        run_id="profiling",
        batch_identifier=batch.batch_id) in validations_set

    site_builder = SiteBuilder(
        data_context=context,
        runtime_environment={"root_directory": context.root_directory},
        **local_site_config)
    res = site_builder.build()

    index_page_locator_info = res[0]
    index_links_dict = res[1]

    # assert that how-to buttons and related elements are rendered (default behavior)
    assert_how_to_buttons(context, index_page_locator_info, index_links_dict)
    print(json.dumps(index_page_locator_info, indent=2))
    assert index_page_locator_info == context.root_directory + '/uncommitted/data_docs/local_site/index.html'

    print(json.dumps(index_links_dict, indent=2))

    assert "site_name" in index_links_dict

    assert "expectations_links" in index_links_dict
    assert len(index_links_dict["expectations_links"]) == 3

    assert "validations_links" in index_links_dict
    assert len(index_links_dict["validations_links"]) == 1, \
    """
    The only rendered validation should be the one not generated by the profiler
    """

    assert "profiling_links" in index_links_dict
    assert len(index_links_dict["profiling_links"]) == 3

    # save documentation locally
    safe_mmkdir("./tests/render/output")
    safe_mmkdir("./tests/render/output/documentation")

    if os.path.isdir("./tests/render/output/documentation"):
        shutil.rmtree("./tests/render/output/documentation")
    shutil.copytree(
        os.path.join(
            site_builder_data_context_with_html_store_titanic_random.
            root_directory, "uncommitted/data_docs/"),
        "./tests/render/output/documentation")

    # let's create another validation result and run the site builder to add it
    # to the data docs
    # the operator does not have an StoreValidationResultAction action configured, so the site
    # will not be updated without our call to site builder

    expectation_suite_path_component = expectation_suite_name.replace('.', '/')
    validation_result_page_path = os.path.join(
        site_builder.site_index_builder.target_store.
        store_backends[ValidationResultIdentifier].full_base_directory,
        "validations", expectation_suite_path_component, run_id,
        batch.batch_id + ".html")

    ts_last_mod_0 = os.path.getmtime(validation_result_page_path)

    run_id = "test_run_id_12346"
    operator_result = context.run_validation_operator(
        assets_to_validate=[batch],
        run_id=run_id,
        validation_operator_name="validate_and_store",
    )

    validation_result_id = ValidationResultIdentifier(
        expectation_suite_identifier=[
            key for key in operator_result["details"].keys()
        ][0],
        run_id=run_id,
        batch_identifier=batch.batch_id)
    res = site_builder.build(resource_identifiers=[validation_result_id])

    index_links_dict = res[1]

    # verify that an additional validation result HTML file was generated
    assert len(index_links_dict["validations_links"]) == 2

    site_builder.site_index_builder.target_store.store_backends[
        ValidationResultIdentifier].full_base_directory

    # verify that the validation result HTML file rendered in the previous run was NOT updated
    ts_last_mod_1 = os.path.getmtime(validation_result_page_path)

    assert ts_last_mod_0 == ts_last_mod_1

    # verify that the new method of the site builder that returns the URL of the HTML file that renders
    # a resource

    new_validation_result_page_path = os.path.join(
        site_builder.site_index_builder.target_store.
        store_backends[ValidationResultIdentifier].full_base_directory,
        "validations", expectation_suite_path_component, run_id,
        batch.batch_id + ".html")

    html_url = site_builder.get_resource_url(
        resource_identifier=validation_result_id)
    assert "file://" + new_validation_result_page_path == html_url

    html_url = site_builder.get_resource_url()
    assert "file://" + os.path.join(site_builder.site_index_builder.target_store.store_backends[\
                                        ValidationResultIdentifier].full_base_directory,
                                        "index.html") == html_url

示例#23

0

显示文件

文件： test_subdir_reader_generator.py 项目： xxsacxx/great_expectations

def test_subdir_reader_path_partitioning(tmp_path_factory):
    base_directory = str(tmp_path_factory.mktemp("test_folder_connection_path"))
    mock_files = [
        "asset_1/20190101__asset_1.csv",
        "asset_1/20190102__asset_1.csv",
        "asset_1/20190103__asset_1.csv",
        "asset_2/20190101__asset_2.csv",
        "asset_2/20190102__asset_2.csv"
    ]
    for file in mock_files:
        safe_mmkdir(os.path.join(base_directory, file.split("/")[0]))
        open(os.path.join(base_directory, file), "w").close()

    subdir_reader_generator = SubdirReaderGenerator("test_generator", base_directory=base_directory)

    # We should see two assets
    known_assets = subdir_reader_generator.get_available_data_asset_names()
    # Use set in test to avoid order issues
    assert set(known_assets) == {"asset_1", "asset_2"}

    # We should see three partitions for the first:
    known_partitions = subdir_reader_generator.get_available_partition_ids("asset_1")
    assert set(known_partitions) == {
        "20190101__asset_1",
        "20190102__asset_1",
        "20190103__asset_1"
    }

    asset_1_kwargs = [kwargs for kwargs in subdir_reader_generator.get_iterator("asset_1")]
    asset_2_kwargs = [kwargs for kwargs in subdir_reader_generator.get_iterator("asset_2")]
    with pytest.raises(BatchKwargsError):
        not_an_asset_kwargs = [kwargs for kwargs in subdir_reader_generator.get_iterator("not_an_asset")]

    assert len(asset_1_kwargs) == 3
    paths = [kwargs["path"] for kwargs in asset_1_kwargs]
    assert set(paths) == {
        os.path.join(base_directory, "asset_1/20190101__asset_1.csv"),
        os.path.join(base_directory, "asset_1/20190102__asset_1.csv"),
        os.path.join(base_directory, "asset_1/20190103__asset_1.csv")
    }
    partitions = [kwargs["partition_id"] for kwargs in asset_1_kwargs]

    # SubdirReaderGenerator uses filenames from subdirectories to generate partition names
    assert set(partitions) == {
        "20190101__asset_1",
        "20190102__asset_1",
        "20190103__asset_1"
    }
    assert len(asset_1_kwargs[0].keys()) == 3

    assert len(asset_2_kwargs) == 2
    paths = [kwargs["path"] for kwargs in asset_2_kwargs]
    assert set(paths) == {
        os.path.join(base_directory, "asset_2/20190101__asset_2.csv"),
        os.path.join(base_directory, "asset_2/20190102__asset_2.csv")
    }
    partitions = [kwargs["partition_id"] for kwargs in asset_2_kwargs]
    assert set(partitions) == {
        "20190101__asset_2",
        "20190102__asset_2"
    }
    assert len(asset_2_kwargs[0].keys()) == 3

示例#24

0

显示文件

文件： test_data_context.py 项目： talagluck/great_expectations

def test_render_full_static_site(tmp_path_factory, filesystem_csv_3):
    project_dir = str(tmp_path_factory.mktemp("project_dir"))
    print(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    curdir = os.path.abspath(os.getcwd())
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    scaffold_directories_and_notebooks(ge_directory)
    context.add_datasource("titanic",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))
    context.add_datasource("random",
                           "pandas",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")
    glob_str = os.path.join(
        ge_directory,
        "uncommitted/validations/*/titanic/default/Titanic/BasicDatasetProfiler.json"
    )
    print(glob_str)
    glob_result = glob(glob_str)
    os.mkdir(os.path.join(ge_directory, "fixtures/validations"))
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/titanic"))
    os.mkdir(os.path.join(ge_directory,
                          "fixtures/validations/titanic/default"))
    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/titanic/default/Titanic/")
    os.mkdir(full_fixture_path)
    shutil.copy(glob_result[0],
                full_fixture_path + "BasicDatasetProfiler.json")

    context.profile_datasource("random")
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/random"))
    os.mkdir(os.path.join(ge_directory, "fixtures/validations/random/default"))

    glob_str = os.path.join(
        ge_directory,
        "uncommitted/validations/*/random/default/f*/BasicDatasetProfiler.json"
    )
    print(glob_str)
    glob_result = glob(glob_str)

    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/random/default/f1/")
    os.mkdir(full_fixture_path)
    shutil.copy(
        glob_result[0],  # !!! This might switch the f1 and f2 files...
        full_fixture_path + "BasicDatasetProfiler.json")
    full_fixture_path = os.path.join(
        ge_directory, "fixtures/validations/random/default/f2/")
    os.mkdir(full_fixture_path)
    shutil.copy(
        glob_result[1],  # !!! This might switch the f1 and f2 files...
        full_fixture_path + "BasicDatasetProfiler.json")
    # for g in glob_result:
    #     shutil.copy(
    #         g,
    #         full_fixture_path+"BasicDatasetProfiler.json"
    #     )

    # os.mkdir(os.path.join(ge_directory,"fixtures")
    context.render_full_static_site()

    # Titanic
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json"
        ))
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/titanic/default/Titanic/BasicDatasetProfiler.html"
        ))

    with open(
            os.path.join(
                ge_directory,
                "fixtures/validations/titanic/default/Titanic/BasicDatasetProfiler.json"
            ), "r") as infile:
        titanic_validation = json.load(infile)
    titanic_run_id = titanic_validation['meta']['run_id']
    titanic_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format(
        run_id=titanic_run_id.replace(':', ''), )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/titanic/default/Titanic/{filename}".
            format(filename=titanic_validation_html_filename)))
    # f1
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/random/default/f1/BasicDatasetProfiler.json")
    )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f1/BasicDatasetProfiler.html"
        ))

    with open(
            os.path.join(
                ge_directory,
                "fixtures/validations/random/default/f1/BasicDatasetProfiler.json"
            ), "r") as infile:
        f1_validation = json.load(infile)
    f1_run_id = f1_validation['meta']['run_id']
    f1_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format(
        run_id=f1_run_id.replace(':', ''), )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f1/{filename}".format(
                filename=f1_validation_html_filename)))
    # f2
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "fixtures/validations/random/default/f2/BasicDatasetProfiler.json")
    )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f2/BasicDatasetProfiler.html"
        ))

    with open(
            os.path.join(
                ge_directory,
                "fixtures/validations/random/default/f2/BasicDatasetProfiler.json"
            ), "r") as infile:
        f2_validation = json.load(infile)
    f2_run_id = f2_validation['meta']['run_id']
    f2_validation_html_filename = "{run_id}-BasicDatasetProfiler.html".format(
        run_id=f2_run_id.replace(':', ''), )
    assert os.path.exists(
        os.path.join(
            ge_directory,
            "uncommitted/documentation/random/default/f2/{filename}".format(
                filename=f2_validation_html_filename)))

    # full site
    assert os.path.exists(
        os.path.join(ge_directory, "uncommitted/documentation/index.html"))

    # save documentation locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/documentation")

    safe_mmkdir("./tests/data_context/output/documentation/titanic")
    try:
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/titanic/default"),
            "./tests/data_context/output/documentation/titanic/default")
    except FileExistsError:
        shutil.rmtree(
            "./tests/data_context/output/documentation/titanic/default")
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/titanic/default"),
            "./tests/data_context/output/documentation/titanic/default")

    safe_mmkdir("./tests/data_context/output/documentation/random")
    try:
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/random/default"),
            "./tests/data_context/output/documentation/random/default")
    except FileExistsError:
        shutil.rmtree(
            "./tests/data_context/output/documentation/random/default")
        shutil.copytree(
            os.path.join(ge_directory,
                         "uncommitted/documentation/random/default"),
            "./tests/data_context/output/documentation/random/default")

    shutil.copy(
        os.path.join(ge_directory, "uncommitted/documentation/index.html"),
        "./tests/data_context/output/documentation")

示例#25

0

显示文件

文件： test_data_context.py 项目： abnair24/great_expectations

def test_render_full_static_site_from_empty_project(tmp_path_factory,
                                                    filesystem_csv_3):

    # TODO : Use a standard test fixture
    # TODO : Have that test fixture copy a directory, rather than building a new one from scratch

    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    shutil.copy(file_relative_path(__file__, "../test_sets/Titanic.csv"),
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
"""

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    context.add_datasource("titanic",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           generators={
                               "subdir_reader": {
                                   "class_name":
                                   "SubdirReaderBatchKwargsGenerator",
                                   "base_directory":
                                   os.path.join(project_dir, "data/titanic/")
                               }
                           })

    context.add_datasource("random",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           generators={
                               "subdir_reader": {
                                   "class_name":
                                   "SubdirReaderBatchKwargsGenerator",
                                   "base_directory":
                                   os.path.join(project_dir, "data/random/")
                               }
                           })

    context.profile_datasource("titanic")

    # Replicate the batch id of the batch that will be profiled in order to generate the file path of the
    # validation result
    titanic_profiled_batch_id = PathBatchKwargs({
        'path':
        os.path.join(project_dir, 'data/titanic/Titanic.csv'),
        'datasource':
        'titanic'
    }).to_id()

    tree_str = gen_directory_tree_str(project_dir)
    assert tree_str == """project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
    great_expectations/
        .gitignore
        great_expectations.yml
        expectations/
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler.json
        notebooks/
            pandas/
                validation_playground.ipynb
            spark/
                validation_playground.ipynb
            sql/
                validation_playground.ipynb
        plugins/
            custom_data_docs/
                renderers/
                styles/
                    data_docs_custom_styles.css
                views/
        uncommitted/
            config_variables.yml
            data_docs/
            samples/
            validations/
                titanic/
                    subdir_reader/
                        Titanic/
                            BasicDatasetProfiler/
                                profiling/
                                    {}.json
""".format(titanic_profiled_batch_id)

    context.profile_datasource("random")
    context.build_data_docs()

    f1_profiled_batch_id = PathBatchKwargs({
        'path':
        os.path.join(project_dir, 'data/random/f1.csv'),
        'datasource':
        'random'
    }).to_id()

    f2_profiled_batch_id = PathBatchKwargs({
        'path':
        os.path.join(project_dir, 'data/random/f2.csv'),
        'datasource':
        'random'
    }).to_id()

    data_docs_dir = os.path.join(project_dir,
                                 "great_expectations/uncommitted/data_docs")
    observed = gen_directory_tree_str(data_docs_dir)
    assert observed == """\
data_docs/
    local_site/
        index.html
        expectations/
            random/
                subdir_reader/
                    f1/
                        BasicDatasetProfiler.html
                    f2/
                        BasicDatasetProfiler.html
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler.html
        static/
            fonts/
                HKGrotesk/
                    HKGrotesk-Bold.otf
                    HKGrotesk-BoldItalic.otf
                    HKGrotesk-Italic.otf
                    HKGrotesk-Light.otf
                    HKGrotesk-LightItalic.otf
                    HKGrotesk-Medium.otf
                    HKGrotesk-MediumItalic.otf
                    HKGrotesk-Regular.otf
                    HKGrotesk-SemiBold.otf
                    HKGrotesk-SemiBoldItalic.otf
            images/
                favicon.ico
                glossary_scroller.gif
                iterative-dev-loop.png
                logo-long-vector.svg
                logo-long.png
                short-logo-vector.svg
                short-logo.png
                validation_failed_unexpected_values.gif
            styles/
                data_docs_custom_styles_template.css
                data_docs_default_styles.css
        validations/
            random/
                subdir_reader/
                    f1/
                        BasicDatasetProfiler/
                            profiling/
                                {0:s}.html
                    f2/
                        BasicDatasetProfiler/
                            profiling/
                                {1:s}.html
            titanic/
                subdir_reader/
                    Titanic/
                        BasicDatasetProfiler/
                            profiling/
                                {2:s}.html
""".format(f1_profiled_batch_id, f2_profiled_batch_id,
           titanic_profiled_batch_id)

    # save data_docs locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/data_docs")

    if os.path.isdir("./tests/data_context/output/data_docs"):
        shutil.rmtree("./tests/data_context/output/data_docs")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"),
                    "./tests/data_context/output/data_docs")

示例#26

0

显示文件

文件： test_data_context.py 项目： DanielOliver/great_expectations

def test_render_full_static_site_from_empty_project(tmp_path_factory,
                                                    filesystem_csv_3):

    # TODO : Use a standard test fixture
    # TODO : Have that test fixture copy a directory, rather than building a new one from scratch

    base_dir = str(tmp_path_factory.mktemp("project_dir"))
    project_dir = os.path.join(base_dir, "project_path")
    os.mkdir(project_dir)

    os.makedirs(os.path.join(project_dir, "data"))
    os.makedirs(os.path.join(project_dir, "data/titanic"))
    shutil.copy("./tests/test_sets/Titanic.csv",
                str(os.path.join(project_dir, "data/titanic/Titanic.csv")))

    os.makedirs(os.path.join(project_dir, "data/random"))
    shutil.copy(os.path.join(filesystem_csv_3, "f1.csv"),
                str(os.path.join(project_dir, "data/random/f1.csv")))
    shutil.copy(os.path.join(filesystem_csv_3, "f2.csv"),
                str(os.path.join(project_dir, "data/random/f2.csv")))

    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
"""

    context = DataContext.create(project_dir)
    ge_directory = os.path.join(project_dir, "great_expectations")
    context.add_datasource("titanic",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           base_directory=os.path.join(project_dir,
                                                       "data/titanic/"))

    context.add_datasource("random",
                           module_name="great_expectations.datasource",
                           class_name="PandasDatasource",
                           base_directory=os.path.join(project_dir,
                                                       "data/random/"))

    context.profile_datasource("titanic")
    assert gen_directory_tree_str(project_dir) == """\
project_path/
    data/
        random/
            f1.csv
            f2.csv
        titanic/
            Titanic.csv
    great_expectations/
        .gitignore
        great_expectations.yml
        datasources/
        expectations/
            titanic/
                default/
                    Titanic/
                        BasicDatasetProfiler.json
        notebooks/
            create_expectations.ipynb
            integrate_validation_into_pipeline.ipynb
        plugins/
        uncommitted/
            config_variables.yml
            data_docs/
            samples/
            validations/
                profiling/
                    titanic/
                        default/
                            Titanic/
                                BasicDatasetProfiler.json
"""

    context.profile_datasource("random")
    context.build_data_docs()

    data_docs_dir = os.path.join(project_dir,
                                 "great_expectations/uncommitted/data_docs")
    observed = gen_directory_tree_str(data_docs_dir)
    print(observed)
    assert observed == """\
data_docs/
    local_site/
        index.html
        expectations/
            random/
                default/
                    f1/
                        BasicDatasetProfiler.html
                    f2/
                        BasicDatasetProfiler.html
            titanic/
                default/
                    Titanic/
                        BasicDatasetProfiler.html
        validations/
            profiling/
                random/
                    default/
                        f1/
                            BasicDatasetProfiler.html
                        f2/
                            BasicDatasetProfiler.html
                titanic/
                    default/
                        Titanic/
                            BasicDatasetProfiler.html
"""

    # save data_docs locally
    safe_mmkdir("./tests/data_context/output")
    safe_mmkdir("./tests/data_context/output/data_docs")

    if os.path.isdir("./tests/data_context/output/data_docs"):
        shutil.rmtree("./tests/data_context/output/data_docs")
    shutil.copytree(os.path.join(ge_directory, "uncommitted/data_docs/"),
                    "./tests/data_context/output/data_docs")