def test_sqlalchemy_data_context(test_db_connection_string):
    context = get_data_context(
        'SqlAlchemy', test_db_connection_string, echo=False)

    assert context.list_datasets() == ['table_1', 'table_2']
    dataset = context.get_dataset('table_1')
    assert isinstance(dataset, SqlAlchemyDataset)
def test_custom_sql():
    options = 'sqlite:///example.db'
    sql_context = ge.get_data_context('SqlAlchemy', options)
    pprint(sql_context.list_datasets())

    query = '''
        select a.*
        from person as p 
            inner join address as a
            on p.id = a.id
    '''
    sql_dataset = sql_context.get_dataset(dataset_name='person',
                                          custom_sql=query)

    # sql_dataset.expect_column_values_to_not_be_null('id')
    # Fails here because something wrong with dataset_name
    sql_dataset.expect_column_values_to_not_be_null('street_name')
def test_pandas_data_context(test_folder_connection_path):
    context = get_data_context('PandasCSV', test_folder_connection_path)

    assert context.list_datasets() == ['test.csv']
    dataset = context.get_dataset('test.csv')
    assert isinstance(dataset, PandasDataset)
def test_invalid_data_context():
    # Test an unknown data context name
    with pytest.raises(ValueError) as err:
        context = get_data_context('what_a_ridiculous_name', None)
        assert "Unknown data context." in str(err)
示例#5
0
    def get_ge_df(self, dataset_name, **kwargs):
        self.log.info("Connecting to dataset {dataset} on {uri}".format(uri=self.get_uri(), dataset=dataset_name))
        sql_context = ge.get_data_context('SqlAlchemy', self.get_uri())

        return sql_context.get_dataset(dataset_name=dataset_name, **kwargs)
def get_data_context(project_id, credentials_path):
    uri = BQ_URI_FMT.format(project_id)
    context = ge.get_data_context('SqlAlchemy', uri)
    context.engine = create_engine(uri, credentials_path=credentials_path)
    return context
import great_expectations as ge
import pytest

context = ge.get_data_context('SparkCSV', './tests/test_sets')
titanic_dataset = context.get_dataset('Titanic.csv', header=True)
strf_dataset = context.get_dataset('strf_test.csv', header=True)


def test_expect_column_values_to_be_unique():
    result = titanic_dataset.expect_column_values_to_be_unique('_c0')
    assert result['success']

    result = titanic_dataset.expect_column_values_to_be_unique('Age')
    assert not result['success']

    result = titanic_dataset.expect_column_values_to_be_unique('Name')
    assert not result['success']
    assert 'Kelly, Mr James' in result['result']['partial_unexpected_list']

    result = titanic_dataset.expect_column_values_to_be_unique('Name',
                                                               mostly=0.95)
    assert result['success']


def test_expect_column_values_to_match_strftime_format():
    result = strf_dataset.expect_column_values_to_match_strftime_format(
        'date', '%Y-%m-%d')
    assert result['success']

    result = strf_dataset.expect_column_values_to_match_strftime_format(
        'date', '%Y%m%d')