def test_sqlalchemy_data_context(test_db_connection_string): context = get_data_context( 'SqlAlchemy', test_db_connection_string, echo=False) assert context.list_datasets() == ['table_1', 'table_2'] dataset = context.get_dataset('table_1') assert isinstance(dataset, SqlAlchemyDataset)
def test_custom_sql(): options = 'sqlite:///example.db' sql_context = ge.get_data_context('SqlAlchemy', options) pprint(sql_context.list_datasets()) query = ''' select a.* from person as p inner join address as a on p.id = a.id ''' sql_dataset = sql_context.get_dataset(dataset_name='person', custom_sql=query) # sql_dataset.expect_column_values_to_not_be_null('id') # Fails here because something wrong with dataset_name sql_dataset.expect_column_values_to_not_be_null('street_name')
def test_pandas_data_context(test_folder_connection_path): context = get_data_context('PandasCSV', test_folder_connection_path) assert context.list_datasets() == ['test.csv'] dataset = context.get_dataset('test.csv') assert isinstance(dataset, PandasDataset)
def test_invalid_data_context(): # Test an unknown data context name with pytest.raises(ValueError) as err: context = get_data_context('what_a_ridiculous_name', None) assert "Unknown data context." in str(err)
def get_ge_df(self, dataset_name, **kwargs): self.log.info("Connecting to dataset {dataset} on {uri}".format(uri=self.get_uri(), dataset=dataset_name)) sql_context = ge.get_data_context('SqlAlchemy', self.get_uri()) return sql_context.get_dataset(dataset_name=dataset_name, **kwargs)
def get_data_context(project_id, credentials_path): uri = BQ_URI_FMT.format(project_id) context = ge.get_data_context('SqlAlchemy', uri) context.engine = create_engine(uri, credentials_path=credentials_path) return context
import great_expectations as ge import pytest context = ge.get_data_context('SparkCSV', './tests/test_sets') titanic_dataset = context.get_dataset('Titanic.csv', header=True) strf_dataset = context.get_dataset('strf_test.csv', header=True) def test_expect_column_values_to_be_unique(): result = titanic_dataset.expect_column_values_to_be_unique('_c0') assert result['success'] result = titanic_dataset.expect_column_values_to_be_unique('Age') assert not result['success'] result = titanic_dataset.expect_column_values_to_be_unique('Name') assert not result['success'] assert 'Kelly, Mr James' in result['result']['partial_unexpected_list'] result = titanic_dataset.expect_column_values_to_be_unique('Name', mostly=0.95) assert result['success'] def test_expect_column_values_to_match_strftime_format(): result = strf_dataset.expect_column_values_to_match_strftime_format( 'date', '%Y-%m-%d') assert result['success'] result = strf_dataset.expect_column_values_to_match_strftime_format( 'date', '%Y%m%d')