Exemplos de DatasourceConfig em Python, exemplos de great_expectations.data_context.types.base.DatasourceConfig em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_BaseStoreBackendDefaults_and_simple_defaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup with many defaults is created accurately
    and produces a valid DataContextConfig
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=BaseStoreBackendDefaults(),
    )

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ge_data_profiler.py Projeto: northwesternmutual/datahub

    def __init__(self, conn, report):
        self.conn = conn
        self.report = report

        data_context_config = DataContextConfig(
            datasources={
                self.datasource_name:
                DatasourceConfig(
                    class_name="SqlAlchemyDatasource",
                    credentials={
                        # This isn't actually used since we pass the connection directly,
                        # but GE parses it to change some of its behavior so it's useful
                        # to emulate that here.
                        "url": self.conn.engine.url,
                    },
                )
            },
            store_backend_defaults=InMemoryStoreBackendDefaults(),
            anonymous_usage_statistics={
                "enabled": False,
                # "data_context_id": <not set>,
            },
        )

        with _properly_init_datasource(self.conn):
            self.data_context = BaseDataContext(
                project_config=data_context_config)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults_no_root_directory(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately
    This test does not set the optional root_directory parameter
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=FilesystemStoreBackendDefaults(),
    )

    # Create desired config
    data_context_id = data_context_config.anonymous_usage_statistics.data_context_id
    desired_config = construct_data_context_config(
        data_context_id=data_context_id, datasources=default_pandas_datasource_config
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 4

0

Exibir arquivo

    def _ge_context(self) -> Iterator[GEContext]:
        with self.base_engine.connect() as conn:
            data_context = BaseDataContext(project_config=DataContextConfig(
                # The datasource will be added via add_datasource().
                datasources={},
                store_backend_defaults=InMemoryStoreBackendDefaults(),
                anonymous_usage_statistics={
                    "enabled": False,
                    # "data_context_id": <not set>,
                },
            ))

            datasource_name = f"{self._datasource_name_base}-{uuid.uuid4()}"
            datasource_config = DatasourceConfig(
                class_name="SqlAlchemyDatasource",
                credentials={
                    # This isn't actually used since we pass the connection directly,
                    # but GE parses it to change some of its behavior so it's useful
                    # to emulate that here.
                    "url": conn.engine.url,
                },
            )
            with _inject_connection_into_datasource(conn):
                # Using the add_datasource method ensures that the datasource is added to
                # GE-internal cache, which avoids problems when calling GE methods later on.
                assert data_context.add_datasource(
                    datasource_name,
                    initialize=True,
                    **dict(datasourceConfigSchema.dump(datasource_config)),
                )
            assert data_context.get_datasource(datasource_name)

            yield GEContext(data_context, datasource_name)

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_datasource_store.py Projeto: admariner/great_expectations

def test_datasource_store_retrieve_by_name(
    datasource_name: str,
    datasource_config: DatasourceConfig,
    datasource_store_with_single_datasource: DatasourceStore,
) -> None:
    actual_config: DatasourceConfig = (
        datasource_store_with_single_datasource.retrieve_by_name(
            datasource_name=datasource_name))
    assert datasource_config.to_dict() == actual_config.to_dict()

Exemplo n.º 6

0

Exibir arquivo

def test_SparkDatasource_config(default_spark_datasource_config):

    datasource_config = DatasourceConfig(
        class_name="SparkDFDatasource",
        batch_kwargs_generators={},
    )

    desired_config = default_spark_datasource_config["my_spark_datasource"]

    datasource_config_schema = DatasourceConfigSchema()
    assert datasource_config_schema.dump(datasource_config) == desired_config

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_datasource_store.py Projeto: admariner/great_expectations

def test_datasource_store_update_raises_error_if_datasource_doesnt_exist(
    datasource_name: str,
    empty_datasource_store: DatasourceStore,
) -> None:
    updated_datasource_config: DatasourceConfig = DatasourceConfig()
    with pytest.raises(ValueError) as e:
        empty_datasource_store.update_by_name(
            datasource_name=datasource_name,
            datasource_config=updated_datasource_config)

    assert f"Unable to load datasource `{datasource_name}`" in str(e.value)

Exemplo n.º 8

0

Exibir arquivo

def test_DataContextConfig_with_FilesystemStoreBackendDefaults_and_simple_defaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup using FilesystemStoreBackendDefaults is created accurately
    This test sets the root_dir parameter
    """

    test_root_directory = "test_root_dir"

    store_backend_defaults = FilesystemStoreBackendDefaults(
        root_directory=test_root_directory
    )
    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=store_backend_defaults,
    )

    # Create desired config
    data_context_id = data_context_config.anonymous_usage_statistics.data_context_id
    desired_config = construct_data_context_config(
        data_context_id=data_context_id, datasources=default_pandas_datasource_config
    )
    # Add root_directory to stores and data_docs
    desired_config["stores"][desired_config["expectations_store_name"]][
        "store_backend"
    ]["root_directory"] = test_root_directory
    desired_config["stores"][desired_config["validations_store_name"]]["store_backend"][
        "root_directory"
    ] = test_root_directory
    desired_config["stores"][desired_config["checkpoint_store_name"]]["store_backend"][
        "root_directory"
    ] = test_root_directory
    desired_config["data_docs_sites"]["local_site"]["store_backend"][
        "root_directory"
    ] = test_root_directory

    data_context_config_schema = DataContextConfigSchema()
    assert filter_properties_dict(
        properties=data_context_config_schema.dump(data_context_config)
    ) == filter_properties_dict(properties=desired_config)
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 9

0

Exibir arquivo

def test_datasource_store_retrieval(
        empty_datasource_store: DatasourceStore,
        datasource_config: DatasourceConfig) -> None:
    store: DatasourceStore = empty_datasource_store

    key: DataContextVariableKey = DataContextVariableKey(
        resource_type=DataContextVariableSchema.DATASOURCES,
        resource_name="my_datasource",
    )
    store.set(key=key, value=datasource_config)
    res: DatasourceConfig = store.get(key=key)

    assert isinstance(res, DatasourceConfig)
    assert res.to_json_dict() == datasource_config.to_json_dict()

Exemplo n.º 10

0

Exibir arquivo

    def  build_context(self):
        """
            Purpose:
                Create a dataContext and datasource and add to object 
            Returns:
                saves dataContext and datasource to self
        """
        self.context=ge.get_context()

        #create datasource configuration
        datasource_config = {
            "name": "example_datasource",
            "class_name": "Datasource",
            "module_name": "great_expectations.datasource",
            "execution_engine": {
                "module_name": "great_expectations.execution_engine",
                "class_name": "PandasExecutionEngine",
            },
            "data_connectors": {
                "default_runtime_data_connector_name": {
                    "class_name": "RuntimeDataConnector",
                    "batch_identifiers": ["default_identifier_name"],
                },
            },
        }

        #create data context configuration
        data_context_config = DataContextConfig(
            datasources={
                "pandas": DatasourceConfig(
                    class_name="Datasource",
                    execution_engine={
                        "class_name": "PandasExecutionEngine"
                    },
                    data_connectors={
                        "default_runtime_data_connector_name": {
                            "class_name": "RuntimeDataConnector",
                            "batch_identifiers": ["default_identifier_name"],
                        }
                    },
                )
            },
            store_backend_defaults=FilesystemStoreBackendDefaults(root_directory=os.path.join(os.getcwd(),'great_expectations')),
        )

        #build context and add data source
        self.context=BaseDataContext(project_config=data_context_config)
        #self.context.test_yaml_config(yaml.dump(datasource_config))
        self.context.add_datasource(**datasource_config)

Exemplo n.º 11

0

Exibir arquivo

def test_datasource_store_retrieval_cloud_mode(
    datasource_config: DatasourceConfig,
    ge_cloud_base_url: str,
    ge_cloud_access_token: str,
    ge_cloud_organization_id: str,
) -> None:
    ge_cloud_store_backend_config: dict = {
        "class_name": "GeCloudStoreBackend",
        "ge_cloud_base_url": ge_cloud_base_url,
        "ge_cloud_resource_type": "datasource",
        "ge_cloud_credentials": {
            "access_token": ge_cloud_access_token,
            "organization_id": ge_cloud_organization_id,
        },
        "suppress_store_backend_id": True,
    }

    store: DatasourceStore = DatasourceStore(
        store_name="my_cloud_datasource_store",
        store_backend=ge_cloud_store_backend_config,
    )

    key: GeCloudIdentifier = GeCloudIdentifier(resource_type="datasource",
                                               ge_cloud_id="foobarbaz")

    with patch("requests.patch", autospec=True) as mock_patch:
        type(mock_patch.return_value).status_code = PropertyMock(
            return_value=200)

        store.set(key=key, value=datasource_config)

        mock_patch.assert_called_with(
            "https://app.test.greatexpectations.io/organizations/bd20fead-2c31-4392-bcd1-f1e87ad5a79c/datasources/foobarbaz",
            json={
                "data": {
                    "type": "datasource",
                    "id": "foobarbaz",
                    "attributes": {
                        "datasource_config": datasource_config.to_dict(),
                        "organization_id": ge_cloud_organization_id,
                    },
                }
            },
            headers={
                "Content-Type": "application/vnd.api+json",
                "Authorization": "Bearer 6bb5b6f5c7794892a4ca168c65c2603e",
            },
        )

Exemplo n.º 12

0

Exibir arquivo

def test_PandasDatasource_config(default_pandas_datasource_config):

    datasource_config = DatasourceConfig(
        class_name="PandasDatasource",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": "../data/",
            }
        },
    )

    desired_config = default_pandas_datasource_config["my_pandas_datasource"]

    datasource_config_schema = DatasourceConfigSchema()
    assert datasource_config_schema.dump(datasource_config) == desired_config

Exemplo n.º 13

0

Exibir arquivo

Arquivo: great_expectations.py Projeto: dyvenia/viadot

    def _get_ge_context_local(ge_project_path: str) -> BaseDataContext:
        """
        This is configured to work with an in-memory pandas DataFrame.
        This setup allows us to run validations before (perhaps unnecessarily) writing any data
        to disk, as well as at any other stage.

        Currently using local storage.

        Args:
        ge_project_path (str): The path to the Great Expectations project,
        eg. `/home/viadot/my_flow`. Expectation suites need to be placed inside the
        `expectations` folder, eg. `/home/viadot/my_flow/expectations/failure.json`.

        Returns:
            BaseDataContext: The GE context (ie. config) required to run the validations.
        """
        data_context_config = DataContextConfig(
            datasources={
                "pandas": DatasourceConfig(
                    class_name="PandasDatasource",
                    batch_kwargs_generators={},  # override the CSV default
                )
            },
            store_backend_defaults=FilesystemStoreBackendDefaults(ge_project_path),
            validation_operators={
                "action_list_operator": {
                    "class_name": "ActionListValidationOperator",
                    "action_list": [
                        {
                            "name": "store_validation_result",
                            "action": {"class_name": "StoreValidationResultAction"},
                        },
                        {
                            "name": "store_evaluation_params",
                            "action": {"class_name": "StoreEvaluationParametersAction"},
                        },
                        {
                            "name": "update_data_docs",
                            "action": {"class_name": "UpdateDataDocsAction"},
                        },
                    ],
                }
            },
        )
        context = BaseDataContext(project_config=data_context_config)
        return context

Exemplo n.º 14

0

Exibir arquivo

def test_SqlAlchemyDatasource_config(default_sql_alchemy_datasource_config):

    datasource_config = DatasourceConfig(
        class_name="SqlAlchemyDatasource",
        credentials={
            "drivername": "custom_drivername",
            "host": "custom_host",
            "port": "custom_port",
            "username": "******",
            "password": "******",
            "database": "custom_database",
        },
    )

    desired_config = default_sql_alchemy_datasource_config[
        "my_sql_alchemy_datasource"]

    datasource_config_schema = DatasourceConfigSchema()
    assert datasource_config_schema.dump(datasource_config) == desired_config

Exemplo n.º 15

0

Exibir arquivo

def test_datasource_store_with_inline_store_backend(
        datasource_config: DatasourceConfig,
        empty_data_context: DataContext) -> None:
    inline_store_backend_config: dict = {
        "class_name": "InlineStoreBackend",
        "data_context": empty_data_context,
        "suppress_store_backend_id": True,
    }

    store: DatasourceStore = DatasourceStore(
        store_name="my_datasource_store",
        store_backend=inline_store_backend_config,
    )

    key: DataContextVariableKey = DataContextVariableKey(
        resource_type=DataContextVariableSchema.DATASOURCES,
        resource_name="my_datasource",
    )

    store.set(key=key, value=datasource_config)
    res: DatasourceConfig = store.get(key=key)

    assert isinstance(res, DatasourceConfig)
    assert res.to_json_dict() == datasource_config.to_json_dict()

Exemplo n.º 16

0

Exibir arquivo

def datasource_config() -> DatasourceConfig:
    return DatasourceConfig(
        class_name="Datasource",
        execution_engine={
            "class_name": "PandasExecutionEngine",
            "module_name": "great_expectations.execution_engine",
        },
        data_connectors={
            "tripdata_monthly_configured": {
                "class_name": "ConfiguredAssetFilesystemDataConnector",
                "module_name": "great_expectations.datasource.data_connector",
                "base_directory": "/path/to/trip_data",
                "assets": {
                    "yellow": {
                        "class_name": "Asset",
                        "module_name":
                        "great_expectations.datasource.data_connector.asset",
                        "pattern": r"yellow_tripdata_(\d{4})-(\d{2})\.csv$",
                        "group_names": ["year", "month"],
                    }
                },
            }
        },
    )

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_GCSStoreBackendDefaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Make sure that using GCSStoreBackendDefaults as the store_backend_defaults applies appropriate
    defaults, including default_bucket_name & default_project_name getting propagated
    to all stores.
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                module_name="great_expectations.datasource",
                data_asset_type={
                    "module_name": "great_expectations.dataset",
                    "class_name": "PandasDataset",
                },
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=GCSStoreBackendDefaults(
            default_bucket_name="my_default_bucket",
            default_project_name="my_default_project",
        ),
    )

    # Create desired config
    data_context_id = data_context_config.anonymous_usage_statistics.data_context_id
    desired_stores_config = {
        "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"},
        "expectations_GCS_store": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "my_default_bucket",
                "project": "my_default_project",
                "class_name": "TupleGCSStoreBackend",
                "prefix": "expectations",
            },
        },
        "validations_GCS_store": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "my_default_bucket",
                "project": "my_default_project",
                "class_name": "TupleGCSStoreBackend",
                "prefix": "validations",
            },
        },
    }
    desired_data_docs_sites_config = {
        "gcs_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "bucket": "my_default_bucket",
                "project": "my_default_project",
                "class_name": "TupleGCSStoreBackend",
                "prefix": "data_docs",
            },
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_id,
        datasources=default_pandas_datasource_config,
        expectations_store_name="expectations_GCS_store",
        validations_store_name="validations_GCS_store",
        evaluation_parameter_store_name=DataContextConfigDefaults.DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value,
        stores=desired_stores_config,
        data_docs_sites=desired_data_docs_sites_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_S3StoreBackendDefaults_and_simple_defaults_with_variable_sub(
    monkeypatch, construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Ensure that a very simple DataContextConfig setup with many defaults is created accurately
    and produces a valid DataContextConfig
    """

    monkeypatch.setenv("SUBSTITUTED_BASE_DIRECTORY", "../data/")

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "${SUBSTITUTED_BASE_DIRECTORY}",
                    }
                },
            )
        },
        store_backend_defaults=S3StoreBackendDefaults(
            default_bucket_name="my_default_bucket"
        ),
    )

    # Create desired config
    desired_stores_config = {
        "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"},
        "expectations_S3_store": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "my_default_bucket",
                "class_name": "TupleS3StoreBackend",
                "prefix": "expectations",
            },
        },
        "validations_S3_store": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "my_default_bucket",
                "class_name": "TupleS3StoreBackend",
                "prefix": "validations",
            },
        },
    }
    desired_data_docs_sites_config = {
        "s3_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "bucket": "my_default_bucket",
                "class_name": "TupleS3StoreBackend",
                "prefix": "data_docs",
            },
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
        expectations_store_name="expectations_S3_store",
        validations_store_name="validations_S3_store",
        evaluation_parameter_store_name=DataContextConfigDefaults.DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value,
        stores=desired_stores_config,
        data_docs_sites=desired_data_docs_sites_config,
    )

    desired_config["datasources"]["my_pandas_datasource"]["batch_kwargs_generators"][
        "subdir_reader"
    ]["base_directory"] = "${SUBSTITUTED_BASE_DIRECTORY}"

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

    data_context = BaseDataContext(project_config=data_context_config)
    assert (
        data_context.datasources["my_pandas_datasource"]
        .get_batch_kwargs_generator("subdir_reader")
        ._base_directory
        == "../data/"
    )

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_data_context_config_variables.py Projeto: vtdangg/great_expectations

def test_create_data_context_and_config_vars_in_code(tmp_path_factory,
                                                     monkeypatch):
    """
    What does this test and why?
    Creating a DataContext via .create(), then using .save_config_variable() to save a variable that will eventually be substituted (e.g. ${SOME_VAR}) should result in the proper escaping of $.
    This is in response to issue #2196
    """

    project_path = str(tmp_path_factory.mktemp("data_context"))
    context = ge.DataContext.create(
        project_root_dir=project_path,
        usage_statistics_enabled=False,
    )

    CONFIG_VARS = {
        "DB_HOST": "${DB_HOST_FROM_ENV_VAR}",
        "DB_NAME": "DB_NAME",
        "DB_USER": "******",
        "DB_PWD": "pas$word",
    }
    for k, v in CONFIG_VARS.items():
        context.save_config_variable(k, v)

    config_vars_file_contents = context._load_config_variables_file()

    # Add escaping for DB_PWD since it is not of the form ${SOMEVAR} or $SOMEVAR
    CONFIG_VARS_WITH_ESCAPING = CONFIG_VARS.copy()
    CONFIG_VARS_WITH_ESCAPING["DB_PWD"] = r"pas\$word"

    # Ensure all config vars saved are in the config_variables.yml file
    # and that escaping was added for "pas$word" -> "pas\$word"
    assert all(item in config_vars_file_contents.items()
               for item in CONFIG_VARS_WITH_ESCAPING.items())
    assert not all(item in config_vars_file_contents.items()
                   for item in CONFIG_VARS.items())

    # Add env var for substitution
    monkeypatch.setenv("DB_HOST_FROM_ENV_VAR", "DB_HOST_FROM_ENV_VAR_VALUE")

    datasource_config = DatasourceConfig(
        class_name="SqlAlchemyDatasource",
        credentials={
            "drivername": "postgresql",
            "host": "$DB_HOST",
            "port": "65432",
            "database": "${DB_NAME}",
            "username": "******",
            "password": "******",
        },
    )
    datasource_config_schema = DatasourceConfigSchema()

    # use context.add_datasource to test this by adding a datasource with values to substitute.
    context.add_datasource(initialize=False,
                           name="test_datasource",
                           **datasource_config_schema.dump(datasource_config))

    assert context.list_datasources()[0]["credentials"] == {
        "drivername": "postgresql",
        "host": "DB_HOST_FROM_ENV_VAR_VALUE",
        "port": "65432",
        "database": "DB_NAME",
        "username": "******",
        # Note masking of "password" field
        "password": "******",
    }

    # Check context substitutes escaped variables appropriately
    data_context_config_schema = DataContextConfigSchema()
    context_with_variables_substituted_dict = data_context_config_schema.dump(
        context.get_config_with_variables_substituted())

    test_datasource_credentials = context_with_variables_substituted_dict[
        "datasources"]["test_datasource"]["credentials"]

    assert test_datasource_credentials["host"] == "DB_HOST_FROM_ENV_VAR_VALUE"
    assert test_datasource_credentials["username"] == "DB_USER"
    assert test_datasource_credentials["password"] == "pas$word"
    assert test_datasource_credentials["database"] == "DB_NAME"

    # Ensure skip_if_substitution_variable=False works as documented
    context.save_config_variable("escaped",
                                 "$SOME_VAR",
                                 skip_if_substitution_variable=False)
    context.save_config_variable("escaped_curly",
                                 "${SOME_VAR}",
                                 skip_if_substitution_variable=False)

    config_vars_file_contents = context._load_config_variables_file()

    assert config_vars_file_contents["escaped"] == r"\$SOME_VAR"
    assert config_vars_file_contents["escaped_curly"] == r"\${SOME_VAR}"

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_GCSStoreBackendDefaults_using_all_parameters(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Make sure that GCSStoreBackendDefaults parameters are handled appropriately
    E.g. Make sure that default_bucket_name is ignored if individual bucket names are passed
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                module_name="great_expectations.datasource",
                data_asset_type={
                    "module_name": "great_expectations.dataset",
                    "class_name": "PandasDataset",
                },
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=GCSStoreBackendDefaults(
            default_bucket_name="custom_default_bucket_name",
            default_project_name="custom_default_project_name",
            expectations_store_bucket_name="custom_expectations_store_bucket_name",
            validations_store_bucket_name="custom_validations_store_bucket_name",
            data_docs_bucket_name="custom_data_docs_store_bucket_name",
            expectations_store_project_name="custom_expectations_store_project_name",
            validations_store_project_name="custom_validations_store_project_name",
            data_docs_project_name="custom_data_docs_store_project_name",
            expectations_store_prefix="custom_expectations_store_prefix",
            validations_store_prefix="custom_validations_store_prefix",
            data_docs_prefix="custom_data_docs_prefix",
            expectations_store_name="custom_expectations_GCS_store_name",
            validations_store_name="custom_validations_GCS_store_name",
            evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        ),
    )

    # Create desired config
    desired_stores_config = {
        "custom_evaluation_parameter_store_name": {
            "class_name": "EvaluationParameterStore"
        },
        "custom_expectations_GCS_store_name": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "custom_expectations_store_bucket_name",
                "project": "custom_expectations_store_project_name",
                "class_name": "TupleGCSStoreBackend",
                "prefix": "custom_expectations_store_prefix",
            },
        },
        "custom_validations_GCS_store_name": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "custom_validations_store_bucket_name",
                "project": "custom_validations_store_project_name",
                "class_name": "TupleGCSStoreBackend",
                "prefix": "custom_validations_store_prefix",
            },
        },
    }
    desired_data_docs_sites_config = {
        "gcs_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "bucket": "custom_data_docs_store_bucket_name",
                "project": "custom_data_docs_store_project_name",
                "class_name": "TupleGCSStoreBackend",
                "prefix": "custom_data_docs_prefix",
            },
        }
    }
    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
        expectations_store_name="custom_expectations_GCS_store_name",
        validations_store_name="custom_validations_GCS_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        stores=desired_stores_config,
        data_docs_sites=desired_data_docs_sites_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_DatabaseStoreBackendDefaults_using_all_parameters(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Make sure that DatabaseStoreBackendDefaults parameters are handled appropriately
    E.g. Make sure that default_credentials is ignored if individual store credentials are passed
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                module_name="great_expectations.datasource",
                data_asset_type={
                    "module_name": "great_expectations.dataset",
                    "class_name": "PandasDataset",
                },
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=DatabaseStoreBackendDefaults(
            default_credentials={
                "drivername": "postgresql",
                "host": "localhost",
                "port": "65432",
                "username": "******",
                "password": "******",
                "database": "ge_tutorials",
            },
            expectations_store_credentials={
                "drivername": "custom_expectations_store_drivername",
                "host": "custom_expectations_store_host",
                "port": "custom_expectations_store_port",
                "username": "******",
                "password": "******",
                "database": "custom_expectations_store_database",
            },
            validations_store_credentials={
                "drivername": "custom_validations_store_drivername",
                "host": "custom_validations_store_host",
                "port": "custom_validations_store_port",
                "username": "******",
                "password": "******",
                "database": "custom_validations_store_database",
            },
            expectations_store_name="custom_expectations_database_store_name",
            validations_store_name="custom_validations_database_store_name",
            evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        ),
    )

    # Create desired config
    desired_stores_config = {
        "custom_evaluation_parameter_store_name": {
            "class_name": "EvaluationParameterStore"
        },
        "custom_expectations_database_store_name": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "class_name": "DatabaseStoreBackend",
                "credentials": {
                    "database": "custom_expectations_store_database",
                    "drivername": "custom_expectations_store_drivername",
                    "host": "custom_expectations_store_host",
                    "password": "******",
                    "port": "custom_expectations_store_port",
                    "username": "******",
                },
            },
        },
        "custom_validations_database_store_name": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "class_name": "DatabaseStoreBackend",
                "credentials": {
                    "database": "custom_validations_store_database",
                    "drivername": "custom_validations_store_drivername",
                    "host": "custom_validations_store_host",
                    "password": "******",
                    "port": "custom_validations_store_port",
                    "username": "******",
                },
            },
        },
    }
    desired_data_docs_sites_config = {
        "local_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "base_directory": "uncommitted/data_docs/local_site/",
                "class_name": "TupleFilesystemStoreBackend",
            },
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
        expectations_store_name="custom_expectations_database_store_name",
        validations_store_name="custom_validations_database_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        stores=desired_stores_config,
        data_docs_sites=desired_data_docs_sites_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_DataContextConfig_with_DatabaseStoreBackendDefaults(
    construct_data_context_config, default_pandas_datasource_config
):
    """
    What does this test and why?
    Make sure that using DatabaseStoreBackendDefaults as the store_backend_defaults applies appropriate
    defaults, including default_credentials getting propagated to stores and not data_docs
    """

    data_context_config = DataContextConfig(
        datasources={
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                module_name="great_expectations.datasource",
                data_asset_type={
                    "module_name": "great_expectations.dataset",
                    "class_name": "PandasDataset",
                },
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            )
        },
        store_backend_defaults=DatabaseStoreBackendDefaults(
            default_credentials={
                "drivername": "postgresql",
                "host": "localhost",
                "port": "65432",
                "username": "******",
                "password": "******",
                "database": "ge_tutorials",
            },
        ),
    )

    # Create desired config
    desired_stores_config = {
        "evaluation_parameter_store": {"class_name": "EvaluationParameterStore"},
        "expectations_database_store": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "class_name": "DatabaseStoreBackend",
                "credentials": {
                    "drivername": "postgresql",
                    "host": "localhost",
                    "port": "65432",
                    "username": "******",
                    "password": "******",
                    "database": "ge_tutorials",
                },
            },
        },
        "validations_database_store": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "class_name": "DatabaseStoreBackend",
                "credentials": {
                    "drivername": "postgresql",
                    "host": "localhost",
                    "port": "65432",
                    "username": "******",
                    "password": "******",
                    "database": "ge_tutorials",
                },
            },
        },
    }
    desired_data_docs_sites_config = {
        "local_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "base_directory": "uncommitted/data_docs/local_site/",
                "class_name": "TupleFilesystemStoreBackend",
            },
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources=default_pandas_datasource_config,
        expectations_store_name="expectations_database_store",
        validations_store_name="validations_database_store",
        evaluation_parameter_store_name=DataContextConfigDefaults.DEFAULT_EVALUATION_PARAMETER_STORE_NAME.value,
        stores=desired_stores_config,
        data_docs_sites=desired_data_docs_sites_config,
    )

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 23

0

Exibir arquivo

Arquivo: adf-great-expectations.py Projeto: astronomer/azure-great-expectations

#Get yesterday's date, in the correct format
yesterday_date = '{{ yesterday_ds_nodash }}'

#Define Great Expectations file paths
data_dir = '/usr/local/airflow/include/data/'
data_file = '/usr/local/airflow/include/data/or_20201208.csv'
ge_root_dir = '/usr/local/airflow/include/great_expectations'

#Define Great Expectations contexts
data_context_config = DataContextConfig(
    datasources={
        "data__dir": DatasourceConfig(
            class_name="PandasDatasource",
            batch_kwargs_generators={
                "subdir_reader": {
                    "class_name": "SubdirReaderBatchKwargsGenerator",
                    "base_directory": data_dir,
                }
            },
        )
    },
    store_backend_defaults=FilesystemStoreBackendDefaults(root_directory=ge_root_dir)
)
data_context = BaseDataContext(project_config=data_context_config, context_root_dir=ge_root_dir)


def run_adf_pipeline(pipeline_name, date):
    '''Runs an Azure Data Factory pipeline using the AzureDataFactoryHook and passes in a date parameter
    '''
    
    #Create a dictionary with date parameter

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_data_context_config_ui.py Projeto: varundunga/great_expectations

def test_override_general_defaults(
    construct_data_context_config,
    default_pandas_datasource_config,
    default_spark_datasource_config,
):
    """
    What does this test and why?
    A DataContextConfig should be able to be created by passing items into the constructor that override any defaults.
    It should also be able to handle multiple datasources, even if they are configured with a dictionary or a DatasourceConfig.
    """

    data_context_config = DataContextConfig(
        config_version=999,
        plugins_directory="custom_plugins_directory",
        config_variables_file_path="custom_config_variables_file_path",
        datasources={
            "my_spark_datasource": {
                "data_asset_type": {
                    "class_name": "SparkDFDataset",
                    "module_name": "great_expectations.dataset",
                },
                "class_name": "SparkDFDatasource",
                "module_name": "great_expectations.datasource",
                "batch_kwargs_generators": {},
            },
            "my_pandas_datasource": DatasourceConfig(
                class_name="PandasDatasource",
                batch_kwargs_generators={
                    "subdir_reader": {
                        "class_name": "SubdirReaderBatchKwargsGenerator",
                        "base_directory": "../data/",
                    }
                },
            ),
        },
        stores={
            "expectations_S3_store": {
                "class_name": "ExpectationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "expectations_S3_store2": {
                "class_name": "ExpectationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "validations_S3_store": {
                "class_name": "ValidationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "validations_S3_store2": {
                "class_name": "ValidationsStore",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                    "prefix": "REPLACE_ME",
                },
            },
            "custom_evaluation_parameter_store": {
                "class_name": "EvaluationParameterStore"
            },
        },
        expectations_store_name="custom_expectations_store_name",
        validations_store_name="custom_validations_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        data_docs_sites={
            "s3_site": {
                "class_name": "SiteBuilder",
                "store_backend": {
                    "class_name": "TupleS3StoreBackend",
                    "bucket": "REPLACE_ME",
                },
                "site_index_builder": {
                    "class_name": "DefaultSiteIndexBuilder",
                    "show_cta_footer": True,
                },
            },
            "local_site": {
                "class_name": "SiteBuilder",
                "show_how_to_buttons": True,
                "site_index_builder": {
                    "class_name": "DefaultSiteIndexBuilder",
                    "show_cta_footer": True,
                },
                "store_backend": {
                    "base_directory": "uncommitted/data_docs/local_site/",
                    "class_name": "TupleFilesystemStoreBackend",
                },
            },
        },
        validation_operators={
            "custom_action_list_operator": {
                "class_name": "ActionListValidationOperator",
                "action_list": [
                    {
                        "name": "custom_store_validation_result",
                        "action": {"class_name": "CustomStoreValidationResultAction"},
                    },
                    {
                        "name": "store_evaluation_params",
                        "action": {"class_name": "StoreEvaluationParametersAction"},
                    },
                    {
                        "name": "update_data_docs",
                        "action": {"class_name": "UpdateDataDocsAction"},
                    },
                ],
            }
        },
        anonymous_usage_statistics={"enabled": True},
    )

    desired_stores = {
        "custom_evaluation_parameter_store": {"class_name": "EvaluationParameterStore"},
        "expectations_S3_store": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
        "expectations_S3_store2": {
            "class_name": "ExpectationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
        "validations_S3_store": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
        "validations_S3_store2": {
            "class_name": "ValidationsStore",
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
                "prefix": "REPLACE_ME",
            },
        },
    }

    desired_data_docs_sites_config = {
        "local_site": {
            "class_name": "SiteBuilder",
            "show_how_to_buttons": True,
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "base_directory": "uncommitted/data_docs/local_site/",
                "class_name": "TupleFilesystemStoreBackend",
            },
        },
        "s3_site": {
            "class_name": "SiteBuilder",
            "site_index_builder": {
                "class_name": "DefaultSiteIndexBuilder",
                "show_cta_footer": True,
            },
            "store_backend": {
                "bucket": "REPLACE_ME",
                "class_name": "TupleS3StoreBackend",
            },
        },
    }
    desired_validation_operators = {
        "custom_action_list_operator": {
            "class_name": "ActionListValidationOperator",
            "action_list": [
                {
                    "name": "custom_store_validation_result",
                    "action": {"class_name": "CustomStoreValidationResultAction"},
                },
                {
                    "name": "store_evaluation_params",
                    "action": {"class_name": "StoreEvaluationParametersAction"},
                },
                {
                    "name": "update_data_docs",
                    "action": {"class_name": "UpdateDataDocsAction"},
                },
            ],
        }
    }

    desired_config = construct_data_context_config(
        data_context_id=data_context_config.anonymous_usage_statistics.data_context_id,
        datasources={
            **default_pandas_datasource_config,
            **default_spark_datasource_config,
        },
        config_version=999.0,
        expectations_store_name="custom_expectations_store_name",
        validations_store_name="custom_validations_store_name",
        evaluation_parameter_store_name="custom_evaluation_parameter_store_name",
        stores=desired_stores,
        validation_operators=desired_validation_operators,
        data_docs_sites=desired_data_docs_sites_config,
        plugins_directory="custom_plugins_directory",
    )
    desired_config["config_variables_file_path"] = "custom_config_variables_file_path"

    data_context_config_schema = DataContextConfigSchema()
    assert data_context_config_schema.dump(data_context_config) == desired_config
    assert DataContext.validate_config(project_config=data_context_config)

Exemplo n.º 25

0

Exibir arquivo

Arquivo: simple_build_data_docs.py Projeto: rpatil524/great_expectations

GE was installed inside a zip file -which is a location allowed by PEP 273-).

Therefore, this test is intended to be run after installing GE inside a zip file and
then setting the appropriate PYTHONPATH env variable. If desired, this test can also be
run after installing GE in a normal filesystem location (i.e. a directory).

This test is OK if it finishes without raising an exception.

To make it easier to debug this test, it prints:
* The location of the GE library: to verify that we are testing the library that we want
* The version of the GE library: idem
* data_docs url: If everything works, this will be a url (e.g. starting with file://...)


Additional info: https://github.com/great-expectations/great_expectations/issues/3772 and
https://www.python.org/dev/peps/pep-0273/
"""

print(f"Great Expectations location: {ge.__file__}")
print(f"Great Expectations version: {ge.__version__}")

data_context_config = DataContextConfig(
    datasources={
        "example_datasource": DatasourceConfig(class_name="PandasDatasource")
    },
    store_backend_defaults=FilesystemStoreBackendDefaults(
        root_directory=tempfile.mkdtemp() + os.sep + "my_greatexp_workdir"),
)
context = BaseDataContext(project_config=data_context_config)
print(f"Great Expectations data_docs url: {context.build_data_docs()}")