def test_empty_connection(self): """Check the error when instantiating with an empty connection string""" pattern = (r"`con` argument cannot be empty\. " r"Please provide a SQLAlchemy connection string\.") with pytest.raises(DataSetError, match=pattern): SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=""))
def test_create_connection_only_once(self, mocker): """Test that two datasets that need to connect to the same db (but different tables, for example) only create a connection once. """ mock_engine = mocker.patch( "kedro.extras.datasets.pandas.sql_dataset.create_engine") first = SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) assert len(first.engines) == 1 second = SQLTableDataSet(table_name="other_table", credentials=dict(con=CONNECTION)) assert len(second.engines) == 1 assert len(first.engines) == 1 mock_engine.assert_called_once_with(CONNECTION)
def test_unknown_sql(self): """Check the error when unknown sql dialect is provided; this means the error is raised on catalog creation, rather than on load or save operation. """ pattern = r"The SQL dialect in your connection is not supported by SQLAlchemy" with pytest.raises(DataSetError, match=pattern): SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=FAKE_CONN_STR))
def test_driver_missing(self, mocker): """Check the error when the sql driver is missing""" mocker.patch( "kedro.extras.datasets.pandas.sql_dataset.create_engine", side_effect=ImportError("No module named 'mysqldb'"), ) with pytest.raises(DataSetError, match=ERROR_PREFIX + "mysqlclient"): SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION))
def test_multiple_connections(self, mocker): """Test that two datasets that need to connect to different dbs only create one connection per db. """ mock_engine = mocker.patch( "kedro.extras.datasets.pandas.sql_dataset.create_engine") first = SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) assert len(first.engines) == 1 second_con = f"other_{CONNECTION}" second = SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=second_con)) assert len(second.engines) == 2 assert len(first.engines) == 2 expected_calls = [mocker.call(CONNECTION), mocker.call(second_con)] assert mock_engine.call_args_list == expected_calls
def test_unknown_module(self, mocker): """Test that if an unknown module/driver is encountered by SQLAlchemy then the error should contain the original error message""" mocker.patch( "kedro.extras.datasets.pandas.sql_dataset.create_engine", side_effect=ImportError("No module named 'unknown_module'"), ) pattern = ERROR_PREFIX + r"No module named \'unknown\_module\'" with pytest.raises(DataSetError, match=pattern): SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=CONNECTION))
def test_single_connection(self, dummy_dataframe, mocker): """Test to make sure multiple instances use the same connection object.""" mocker.patch("pandas.read_sql_table") dummy_to_sql = mocker.patch.object(dummy_dataframe, "to_sql") kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) first = SQLTableDataSet(**kwargs) unique_connection = first.engines[CONNECTION] datasets = [SQLTableDataSet(**kwargs) for _ in range(10)] for ds in datasets: ds.save(dummy_dataframe) engine = ds.engines[CONNECTION] assert engine is unique_connection expected_call = mocker.call(name=TABLE_NAME, con=unique_connection, index=False) dummy_to_sql.assert_has_calls([expected_call] * 10) for ds in datasets: ds.load() engine = ds.engines[CONNECTION] assert engine is unique_connection
def test_empty_table_name(self): """Check the error when instantiating with an empty table""" pattern = r"`table\_name` argument cannot be empty\." with pytest.raises(DataSetError, match=pattern): SQLTableDataSet(table_name="", credentials=dict(con=CONNECTION))
def table_data_set(request): kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION)) kwargs.update(request.param) return SQLTableDataSet(**kwargs)
#Read New Matched SCMH Data "read_new_smch_matched": SQLQueryDataSet( sql= read_new_smch_matched, credentials=dict(con=con) ), #Read Old Matched SCMH Data "read_old_smch_matched_data": SQLQueryDataSet( sql= read_old_smch_matched_data, credentials=dict(con=con) ), #Make Use Of Save Method To Create Tables "create_derived_admissions": SQLTableDataSet( table_name='admissions', credentials=dict(con=con), save_args = dict(schema='derived',if_exists='replace') ), #Make Use Of Save Method To Create Tables "create_derived_discharges": SQLTableDataSet( table_name="discharges", credentials=dict(con=con), save_args = dict(schema="derived",if_exists="replace") ), #Make Use Of Save Method To Create Tables "create_joined_admissions_discharges": SQLTableDataSet( table_name='joined_admissions_discharges', credentials=dict(con=con), save_args = dict(schema='derived',if_exists='replace') ),