def test_file_output(tmp_path: Path, yaml_handler: YAMLHandler) -> None:
    simplest_yaml: str = "abc: 1"
    test_file: str = os.path.join(tmp_path, "out.yaml")
    out: Path = Path(test_file)

    data: dict = yaml_handler.load(simplest_yaml)
    yaml_handler.dump(data, out)

    # check the output
    with open(test_file) as f:
        line = f.readline().strip()
        data_from_file: dict = yaml_handler.load(line)

    assert data_from_file == data
def test_dump_default_behavior_with_no_stream_specified(
    yaml_handler: YAMLHandler,
) -> None:
    # when we specify no stream, then StringIO is used by default
    simplest_dict: dict = dict(abc=1)
    dumped: Optional[str] = yaml_handler.dump(simplest_dict)
    assert dumped == "abc: 1\n"
from typing import Dict, List

import pandas as pd
import pytest

import great_expectations
import great_expectations.exceptions as ge_exceptions
from great_expectations import DataContext
from great_expectations.core.batch import Batch, RuntimeBatchRequest
from great_expectations.core.id_dict import BatchSpec
from great_expectations.core.yaml_handler import YAMLHandler
from great_expectations.execution_engine.sparkdf_batch_data import SparkDFBatchData
from great_expectations.validator.validator import Validator

yaml = YAMLHandler()


@pytest.fixture()
def test_df_spark(spark_session):
    test_df: "pyspark.sql.dataframe.DataFrame" = spark_session.createDataFrame(
        data=pd.DataFrame(data={
            "col1": [1, 2],
            "col2": [3, 4]
        }))
    return test_df


@pytest.fixture
def data_context_with_datasource_spark_engine_batch_spec_passthrough(
        empty_data_context, spark_session):
    context: DataContext = empty_data_context
from great_expectations.execution_engine.sqlalchemy_batch_data import (
    SqlAlchemyBatchData, )
from tests.integration.fixtures.split_and_sample_data.splitter_test_cases_and_fixtures import (
    TaxiSplittingTestCase,
    TaxiSplittingTestCases,
    TaxiTestData,
)
from tests.test_utils import (
    LoadedTable,
    clean_up_tables_with_prefix,
    get_bigquery_connection_url,
    get_snowflake_connection_url,
    load_data_into_test_database,
)

yaml_handler: YAMLHandler = YAMLHandler()


def _get_connection_string_and_dialect() -> Tuple[str, str]:

    with open("./connection_string.yml") as f:
        db_config: dict = yaml_handler.load(f)

    dialect: str = db_config["dialect"]
    if dialect == "snowflake":
        connection_string: str = get_snowflake_connection_url()
    elif dialect == "bigquery":
        connection_string: str = get_bigquery_connection_url()
    else:
        connection_string: str = db_config["connection_string"]
import os

# <snippet>
import great_expectations as ge
from great_expectations.core.batch import BatchRequest

# </snippet>
from great_expectations.core.yaml_handler import YAMLHandler

yaml = YAMLHandler()

# <snippet>
context = ge.get_context()
# </snippet>

# NOTE: The following code is only for testing and depends on an environment
# variable to set the gcp_project. You can replace the value with your own
# GCP project information
gcp_project = os.environ.get("GE_TEST_GCP_PROJECT")
if not gcp_project:
    raise ValueError(
        "Environment Variable GE_TEST_GCP_PROJECT is required to run GCS integration tests"
    )

# parse great_expectations.yml for comparison
great_expectations_yaml_file_path = os.path.join(context.root_directory,
                                                 "great_expectations.yml")
with open(great_expectations_yaml_file_path) as f:
    great_expectations_yaml = yaml.load(f)

stores = great_expectations_yaml["stores"]
def test_dump_stdout_specified(capsys, yaml_handler: YAMLHandler) -> None:
    # ruamel documentation recommends that we specify the stream as stdout when we are using YAML to return a string.
    simplest_dict: dict = dict(abc=1)
    yaml_handler.dump(simplest_dict, stream=sys.stdout)
    captured: Any = capsys.readouterr()
    assert captured.out == "abc: 1\n"
def test_load_incorrect_input(yaml_handler: YAMLHandler) -> None:
    with pytest.raises(TypeError):
        yaml_handler.load(12345)
def test_load_correct_input(
    simple_yaml: str, simple_dict: dict, yaml_handler: YAMLHandler
) -> None:
    res: dict = yaml_handler.load(simple_yaml)

    assert res == simple_dict
def yaml_handler() -> YAMLHandler:
    return YAMLHandler()
Exemplo n.º 10
0
Assert statements are included to ensure that if the behaviour shown in this script breaks it will not pass
tests and will be updated.  These statements can be ignored by users.

Comments with the tags `<snippet>` and `</snippet>` are used to ensure that if this script is updated
the snippets that are specified for use in documentation are maintained.  These comments can be ignored by users.

--documentation--
    https://docs.greatexpectations.io/docs/guides/expectations/data_assistants/how_to_create_an_expectation_suite_with_the_onboarding_data_assistant
"""
import great_expectations as ge
from great_expectations.checkpoint import SimpleCheckpoint
from great_expectations.core.batch import BatchRequest
from great_expectations.core.yaml_handler import YAMLHandler

yaml = YAMLHandler()

context: ge.DataContext = ge.get_context()

# Configure your datasource (if you aren't using one that already exists)

# <snippet>
datasource_config = {
    "name": "taxi_multi_batch_datasource",
    "class_name": "Datasource",
    "module_name": "great_expectations.datasource",
    "execution_engine": {
        "module_name": "great_expectations.execution_engine",
        "class_name": "PandasExecutionEngine",
    },
    "data_connectors": {
# <snippet>
import os

import great_expectations as ge
from great_expectations.core.batch import BatchRequest, RuntimeBatchRequest
from great_expectations.core.yaml_handler import YAMLHandler

# </snippet>

yaml = YAMLHandler()
# NOTE: The following code is only for testing and depends on an environment
# variable to set the gcp_project. You can replace the value with your own
# GCP project information
gcp_project = os.environ.get("GE_TEST_GCP_PROJECT")
if not gcp_project:
    raise ValueError(
        "Environment Variable GE_TEST_GCP_PROJECT is required to run BigQuery integration tests"
    )
bigquery_dataset = "demo"

CONNECTION_STRING = f"bigquery://{gcp_project}/{bigquery_dataset}"

# <snippet>
context = ge.get_context()
# </snippet>

# <snippet>
datasource_yaml = f"""
name: my_bigquery_datasource
class_name: Datasource
execution_engine:
Exemplo n.º 12
0
import os
from typing import List

import pytest

from great_expectations.core.yaml_handler import YAMLHandler
from great_expectations.data_context import BaseDataContext
from great_expectations.data_context.types.base import DataContextConfig

from great_expectations.data_context.store import (  # isort:skip
    ExpectationsStore, ValidationsStore, EvaluationParameterStore,
)

yaml: YAMLHandler = YAMLHandler()


@pytest.fixture()
def basic_in_memory_data_context_config_just_stores():
    return DataContextConfig(
        config_version=3.0,
        plugins_directory=None,
        evaluation_parameter_store_name="evaluation_parameter_store",
        expectations_store_name="expectations_store",
        datasources={},
        stores={
            "expectations_store": {
                "class_name": "ExpectationsStore"
            },
            "evaluation_parameter_store": {
                "class_name": "EvaluationParameterStore"
            },