Пример #1
0
def test_sqlalchemy_datasource_processes_dataset_options(
        test_db_connection_string):
    datasource = SqlAlchemyDatasource(
        "SqlAlchemy", credentials={"url": test_db_connection_string})
    batch_kwargs = datasource.process_batch_parameters(
        dataset_options={"caching": False})
    batch_kwargs["query"] = "select * from table_1;"
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False

    batch_kwargs = datasource.process_batch_parameters(
        dataset_options={"caching": True})
    batch_kwargs["query"] = "select * from table_1;"
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is True

    batch_kwargs = {
        "query": "select * from table_1;",
        "dataset_options": {
            "caching": False
        },
    }
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch,
                          ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False
Пример #2
0
def test_standalone_sqlalchemy_datasource(test_db_connection_string):
    datasource = SqlAlchemyDatasource(
        'SqlAlchemy', connection_string=test_db_connection_string, echo=False)

    assert datasource.get_available_data_asset_names() == {
        "default": {"table_1", "table_2"}
    }
    dataset1 = datasource.get_batch("table_1")
    dataset2 = datasource.get_batch("table_2", schema='main')
    assert isinstance(dataset1, SqlAlchemyDataset)
    assert isinstance(dataset2, SqlAlchemyDataset)
Пример #3
0
def test_sqlalchemysource_templating(sqlitedb_engine):
    datasource = SqlAlchemyDatasource(engine=sqlitedb_engine)
    generator = datasource.get_generator()
    generator.add_query("test", "select 'cat' as ${col_name};")
    df = datasource.get_batch("test", col_name="animal_name")
    res = df.expect_column_to_exist("animal_name")
    assert res["success"] == True
Пример #4
0
def test_sqlalchemy_source_limit(sqlitedb_engine):
    df1 = pd.DataFrame({
        "col_1": [1, 2, 3, 4, 5],
        "col_2": ["a", "b", "c", "d", "e"]
    })
    df2 = pd.DataFrame({
        "col_1": [0, 1, 2, 3, 4],
        "col_2": ["b", "c", "d", "e", "f"]
    })
    df1.to_sql("table_1", con=sqlitedb_engine, index=True)
    df2.to_sql("table_2", con=sqlitedb_engine, index=True, schema="main")
    datasource = SqlAlchemyDatasource("SqlAlchemy", engine=sqlitedb_engine)
    limited_batch = datasource.get_batch({
        "table": "table_1",
        "limit": 1,
        "offset": 2
    })
    assert isinstance(limited_batch, Batch)
    limited_dataset = Validator(
        limited_batch,
        expectation_suite=ExpectationSuite("test"),
        expectation_engine=SqlAlchemyDataset,
    ).get_dataset()
    assert limited_dataset._table.name.startswith(
        "ge_tmp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)["col_1"][0] == 3  # offset should have been applied
def test_standalone_sqlalchemy_datasource(test_db_connection_string, sa):
    datasource = SqlAlchemyDatasource(
        "SqlAlchemy",
        connection_string=test_db_connection_string,
        echo=False,
        batch_kwargs_generators={
            "default": {
                "class_name": "TableBatchKwargsGenerator"
            }
        },
    )

    assert set(
        datasource.get_available_data_asset_names()["default"]["names"]) == {
            ("main.table_1", "table"),
            ("main.table_2", "table"),
        }
    batch_kwargs = datasource.build_batch_kwargs("default", "main.table_1")
    batch = datasource.get_batch(batch_kwargs=batch_kwargs)
    assert isinstance(batch, Batch)
    batch_data = batch.data
    assert isinstance(
        batch_data,
        great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyBatchReference,
    )
    dataset = SqlAlchemyDataset(**batch.data.get_init_kwargs())
    assert len(dataset.head(10)) == 5
Пример #6
0
def test_sqlalchemy_source_limit(sqlitedb_engine):
    df1 = pd.DataFrame({
        'col_1': [1, 2, 3, 4, 5],
        'col_2': ['a', 'b', 'c', 'd', 'e']
    })
    df2 = pd.DataFrame({
        'col_1': [0, 1, 2, 3, 4],
        'col_2': ['b', 'c', 'd', 'e', 'f']
    })
    df1.to_sql('table_1', con=sqlitedb_engine, index=True)
    df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main')
    datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine)
    limited_batch = datasource.get_batch({
        "table": "table_1",
        "limit": 1,
        "offset": 2
    })
    assert isinstance(limited_batch, Batch)
    limited_dataset = Validator(
        limited_batch,
        expectation_suite=ExpectationSuite("test"),
        expectation_engine=SqlAlchemyDataset).get_dataset()
    assert limited_dataset._table.name.startswith(
        "ge_tmp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)['col_1'][0] == 3  # offset should have been applied
def test_sqlalchemy_source_templating(sqlitedb_engine, empty_data_context):
    context: DataContext = empty_data_context
    datasource = SqlAlchemyDatasource(
        engine=sqlitedb_engine,
        batch_kwargs_generators={
            "foo": {
                "class_name": "QueryBatchKwargsGenerator"
            }
        },
    )
    generator = datasource.get_batch_kwargs_generator("foo")
    generator.add_query(data_asset_name="test",
                        query="select 'cat' as ${col_name};")
    batch = datasource.get_batch(
        generator.build_batch_kwargs(
            "test", query_parameters={"col_name": "animal_name"}))
    dataset = BridgeValidator(
        batch,
        expectation_suite=ExpectationSuite("test", data_context=context),
        expectation_engine=SqlAlchemyDataset,
    ).get_dataset()
    res = dataset.expect_column_to_exist("animal_name")
    assert res.success is True
    res = dataset.expect_column_values_to_be_in_set("animal_name", ["cat"])
    assert res.success is True
def test_sqlalchemy_datasource_query_and_table_handling(sqlitedb_engine):
    # MANUALLY SET DIALECT NAME FOR TEST
    datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine)
    with mock.patch("great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyBatchReference.__init__",
                    return_value=None) as mock_batch:
        datasource.get_batch({
            "query": "select * from foo;"
        })
    mock_batch.assert_called_once_with(engine=sqlitedb_engine, schema=None, query="select * from foo;", table_name=None)

    # Normally, we do not allow both query and table_name
    with mock.patch("great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyBatchReference.__init__",
                    return_value=None) as mock_batch:
        datasource.get_batch({
            "query": "select * from foo;",
            "table_name": "bar"
        })
    mock_batch.assert_called_once_with(engine=sqlitedb_engine, schema=None, query="select * from foo;", table_name=None)

    # Snowflake should allow *both* query *and* table_name
    sqlitedb_engine.dialect.name = "snowflake"
    with mock.patch("great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyBatchReference.__init__",
                    return_value=None) as mock_batch:
        datasource.get_batch({
            "query": "select * from foo;",
            "table_name": "bar"
        })
    mock_batch.assert_called_once_with(engine=sqlitedb_engine, schema=None, query="select * from foo;",
                                       table_name="bar")
Пример #9
0
def test_sqlalchemy_source_templating(sqlitedb_engine):
    datasource = SqlAlchemyDatasource(
        engine=sqlitedb_engine,
        generators={"foo": {
            "class_name": "QueryGenerator"
        }})
    generator = datasource.get_generator("foo")
    generator.add_query("test", "select 'cat' as ${col_name};")
    df = datasource.get_batch(
        "test", "my_suite",
        generator.yield_batch_kwargs("test", col_name="animal_name"))
    res = df.expect_column_to_exist("animal_name")
    assert res["success"] is True
def test_sqlalchemy_source_templating(sqlitedb_engine):
    datasource = SqlAlchemyDatasource(engine=sqlitedb_engine, generators={
        "foo": {
            "class_name": "QueryBatchKwargsGenerator"
        }
    })
    generator = datasource.get_generator("foo")
    generator.add_query("test", "select 'cat' as ${col_name};")
    batch = datasource.get_batch(generator.build_batch_kwargs("test", query_parameters={'col_name': "animal_name"}))
    dataset = Validator(batch, expectation_suite=ExpectationSuite("test"), expectation_engine=SqlAlchemyDataset).get_dataset()
    res = dataset.expect_column_to_exist("animal_name")
    assert res.success is True
    res = dataset.expect_column_values_to_be_in_set('animal_name', ['cat'])
    assert res.success is True