Python PandasExecutionEngine.get_batch_data示例

编程语言: Python

命名空间/包名称: great_expectations.execution_engine.pandas_execution_engine

方法/功能: get_batch_data

hotexamples.com的示例: 5

Python PandasExecutionEngine.get_batch_data - 已找到5个示例。这些是从开源项目中提取的最受好评的great_expectations.execution_engine.pandas_execution_engine.PandasExecutionEngine.get_batch_data现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

PandasExecutionEngine(30)

load_batch_data(10)

get_compute_domain(6)

get_batch_data(5)

get_domain_records(3)

_s3(2)

resolve_metrics(2)

_azure(1)

_get_reader_fn(1)

示例#1

显示文件

def test_get_batch_with_no_s3_configured(batch_with_split_on_whole_table_s3):
    # if S3 was not configured
    execution_engine_no_s3 = PandasExecutionEngine()
    execution_engine_no_s3._s3 = None
    with pytest.raises(ge_exceptions.ExecutionEngineError):
        execution_engine_no_s3.get_batch_data(
            batch_spec=batch_with_split_on_whole_table_s3)

示例#2

显示文件

def test_get_batch_with_no_azure_configured(azure_batch_spec):
    # if Azure BlobServiceClient was not configured
    execution_engine_no_azure = PandasExecutionEngine()
    execution_engine_no_azure._azure = None

    # Raises error due the connection object not being set
    with pytest.raises(ge_exceptions.ExecutionEngineError):
        execution_engine_no_azure.get_batch_data(batch_spec=azure_batch_spec)

示例#3

显示文件

def test_get_batch_with_no_s3_configured():
    batch_spec = S3BatchSpec(
        path="s3a://i_dont_exist",
        reader_method="read_csv",
        splitter_method="_split_on_whole_table",
    )
    # if S3 was not configured
    execution_engine_no_s3 = PandasExecutionEngine()

    with pytest.raises(ge_exceptions.ExecutionEngineError):
        execution_engine_no_s3.get_batch_data(batch_spec=batch_spec)

示例#4

显示文件

文件： test_pandas_execution_engine.py 项目： MikelDietz/great_expectations

def test_get_batch_with_split_on_whole_table_s3():
    region_name: str = "us-east-1"
    bucket: str = "test_bucket"
    conn = boto3.resource("s3", region_name=region_name)
    conn.create_bucket(Bucket=bucket)
    client = boto3.client("s3", region_name=region_name)

    test_df: pd.DataFrame = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]})
    keys: List[str] = [
        "path/A-100.csv",
        "path/A-101.csv",
        "directory/B-1.csv",
        "directory/B-2.csv",
    ]
    for key in keys:
        client.put_object(Bucket=bucket,
                          Body=test_df.to_csv(index=False).encode("utf-8"),
                          Key=key)

    path = "path/A-100.csv"
    full_path = f"s3a://{os.path.join(bucket, path)}"
    test_df = PandasExecutionEngine().get_batch_data(batch_spec=S3BatchSpec(
        path=full_path,
        reader_method="read_csv",
        splitter_method="_split_on_whole_table",
    ))
    assert test_df.dataframe.shape == (2, 2)

    # if S3 was not configured
    execution_engine_no_s3 = PandasExecutionEngine()
    execution_engine_no_s3._s3 = None
    with pytest.raises(ge_exceptions.ExecutionEngineError):
        execution_engine_no_s3.get_batch_data(batch_spec=S3BatchSpec(
            path=full_path,
            reader_method="read_csv",
            splitter_method="_split_on_whole_table",
        ))

示例#5

显示文件

def test_get_batch_with_gcs_misconfigured(gcs_batch_spec):
    # gcs_batchspec point to data that the ExecutionEngine does not have access to
    execution_engine_no_gcs = PandasExecutionEngine()
    # Raises error if batch_spec causes ExecutionEngine error
    with pytest.raises(ge_exceptions.ExecutionEngineError):
        execution_engine_no_gcs.get_batch_data(batch_spec=gcs_batch_spec)