Exemplo n.º 1
0
 def test_empty_connection(self):
     """Check the error when instantiating with an empty
     connection string"""
     pattern = (r"`con` argument cannot be empty\. "
                r"Please provide a SQLAlchemy connection string\.")
     with pytest.raises(DataSetError, match=pattern):
         SQLTableDataSet(table_name=TABLE_NAME, credentials=dict(con=""))
Exemplo n.º 2
0
    def test_create_connection_only_once(self, mocker):
        """Test that two datasets that need to connect to the same db
        (but different tables, for example) only create a connection once.
        """
        mock_engine = mocker.patch(
            "kedro.extras.datasets.pandas.sql_dataset.create_engine")
        first = SQLTableDataSet(table_name=TABLE_NAME,
                                credentials=dict(con=CONNECTION))
        assert len(first.engines) == 1

        second = SQLTableDataSet(table_name="other_table",
                                 credentials=dict(con=CONNECTION))
        assert len(second.engines) == 1
        assert len(first.engines) == 1

        mock_engine.assert_called_once_with(CONNECTION)
Exemplo n.º 3
0
 def test_unknown_sql(self):
     """Check the error when unknown sql dialect is provided;
     this means the error is raised on catalog creation, rather
     than on load or save operation.
     """
     pattern = r"The SQL dialect in your connection is not supported by SQLAlchemy"
     with pytest.raises(DataSetError, match=pattern):
         SQLTableDataSet(table_name=TABLE_NAME,
                         credentials=dict(con=FAKE_CONN_STR))
Exemplo n.º 4
0
 def test_driver_missing(self, mocker):
     """Check the error when the sql driver is missing"""
     mocker.patch(
         "kedro.extras.datasets.pandas.sql_dataset.create_engine",
         side_effect=ImportError("No module named 'mysqldb'"),
     )
     with pytest.raises(DataSetError, match=ERROR_PREFIX + "mysqlclient"):
         SQLTableDataSet(table_name=TABLE_NAME,
                         credentials=dict(con=CONNECTION))
Exemplo n.º 5
0
    def test_multiple_connections(self, mocker):
        """Test that two datasets that need to connect to different dbs
        only create one connection per db.
        """
        mock_engine = mocker.patch(
            "kedro.extras.datasets.pandas.sql_dataset.create_engine")
        first = SQLTableDataSet(table_name=TABLE_NAME,
                                credentials=dict(con=CONNECTION))
        assert len(first.engines) == 1

        second_con = f"other_{CONNECTION}"
        second = SQLTableDataSet(table_name=TABLE_NAME,
                                 credentials=dict(con=second_con))
        assert len(second.engines) == 2
        assert len(first.engines) == 2

        expected_calls = [mocker.call(CONNECTION), mocker.call(second_con)]
        assert mock_engine.call_args_list == expected_calls
Exemplo n.º 6
0
 def test_unknown_module(self, mocker):
     """Test that if an unknown module/driver is encountered by SQLAlchemy
     then the error should contain the original error message"""
     mocker.patch(
         "kedro.extras.datasets.pandas.sql_dataset.create_engine",
         side_effect=ImportError("No module named 'unknown_module'"),
     )
     pattern = ERROR_PREFIX + r"No module named \'unknown\_module\'"
     with pytest.raises(DataSetError, match=pattern):
         SQLTableDataSet(table_name=TABLE_NAME,
                         credentials=dict(con=CONNECTION))
Exemplo n.º 7
0
    def test_single_connection(self, dummy_dataframe, mocker):
        """Test to make sure multiple instances use the same connection object."""
        mocker.patch("pandas.read_sql_table")
        dummy_to_sql = mocker.patch.object(dummy_dataframe, "to_sql")
        kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION))

        first = SQLTableDataSet(**kwargs)
        unique_connection = first.engines[CONNECTION]
        datasets = [SQLTableDataSet(**kwargs) for _ in range(10)]

        for ds in datasets:
            ds.save(dummy_dataframe)
            engine = ds.engines[CONNECTION]
            assert engine is unique_connection

        expected_call = mocker.call(name=TABLE_NAME,
                                    con=unique_connection,
                                    index=False)
        dummy_to_sql.assert_has_calls([expected_call] * 10)

        for ds in datasets:
            ds.load()
            engine = ds.engines[CONNECTION]
            assert engine is unique_connection
Exemplo n.º 8
0
 def test_empty_table_name(self):
     """Check the error when instantiating with an empty table"""
     pattern = r"`table\_name` argument cannot be empty\."
     with pytest.raises(DataSetError, match=pattern):
         SQLTableDataSet(table_name="", credentials=dict(con=CONNECTION))
Exemplo n.º 9
0
def table_data_set(request):
    kwargs = dict(table_name=TABLE_NAME, credentials=dict(con=CONNECTION))
    kwargs.update(request.param)
    return SQLTableDataSet(**kwargs)
Exemplo n.º 10
0
         #Read New Matched SCMH Data
         "read_new_smch_matched": SQLQueryDataSet(
            sql= read_new_smch_matched,
            credentials=dict(con=con)
         ),

          #Read Old Matched SCMH Data
         "read_old_smch_matched_data": SQLQueryDataSet(
            sql= read_old_smch_matched_data,
            credentials=dict(con=con)
         ),
           
         #Make Use Of Save Method To Create Tables
          "create_derived_admissions": SQLTableDataSet(
            table_name='admissions',
            credentials=dict(con=con),
            save_args = dict(schema='derived',if_exists='replace')
         ),
         #Make Use Of Save Method To Create Tables
         "create_derived_discharges": SQLTableDataSet(
            table_name="discharges",
            credentials=dict(con=con),
            save_args = dict(schema="derived",if_exists="replace")
         ),
         
         #Make Use Of Save Method To Create Tables
         "create_joined_admissions_discharges": SQLTableDataSet(
            table_name='joined_admissions_discharges',
            credentials=dict(con=con),
            save_args = dict(schema='derived',if_exists='replace')
         ),