def _test_exists(self, df, index): assume_not_all_delims_and_quotechars(df) # first populate the data to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="replace", ) # then test to sql with option of 'append' to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="append", ) actual = pd.read_sql_query( sql=f"SELECT * FROM dbo.{self.table_name}", con=self.pyodbc_creds ) df = prep_df_for_comparison(df=df, index=index) expected = pd.concat([df, df], axis=0, ignore_index=True) # appended assert_frame_equal(expected, actual, check_column_type="equiv")
def test_tosql_all_quotechars(sql_creds): df = pd.DataFrame({ i: [v, "random", "string", 1, 2.0] for i, v in enumerate(_QUOTECHAR_OPTIONS) }) with pytest.raises(BCPandasValueError): to_sql(df=df, table_name="tbl_all_delims", creds=sql_creds, if_exists="replace")
def _test_df_template(self, df, sql_creds, index): assume_not_all_delims_and_quotechars(df) with pytest.raises(AssertionError): to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="bad_arg", )
def _create_tbl_with_data(self): # first create a new table and put some data into it execute_sql_statement( self.sql_creds.engine, f"DROP TABLE IF EXISTS {self.schema_name}.{self.table_name}" ) to_sql( df=self.df, table_name=self.table_name, creds=self.sql_creds, if_exists="replace", index=False, sql_type=self.sql_type, )
def _test_base(self, df, index): assume_not_all_delims_and_quotechars(df) to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="replace", ) actual = pd.read_sql_query(sql=f"SELECT * FROM {self.table_name}", con=self.pyodbc_creds) expected = prep_df_for_comparison(df=df, index=index) assert_frame_equal(expected, actual, check_column_type="equiv")
def run_benchmark_readsql(df: pd.DataFrame, creds: SqlCreds) -> Dict[str, float]: chunk_size = floor(2000 / (len(df.columns) + 1)) # +1 in case index=True # first create table and insert rows tbl_name = "sql_tbl_read_sql" to_sql( df, table_name=tbl_name, creds=creds, sql_type="table", schema="dbo", index=False, if_exists="replace", batch_size=10_000, ) funcs = [ dict( title=f"pandas_readsql_{chunk_size}", func=pd.read_sql_table, schema="dbo", table_name=tbl_name, con=creds.engine, chunksize=chunk_size, ), dict( title=f"bcpandas_batchsize_{chunk_size}_check_delim", func=read_sql, table_name=tbl_name, creds=creds, sql_type="table", schema="dbo", batch_size=chunk_size, check_delim=True, ), dict( title=f"bcpandas_batchsize_{chunk_size}_no_check_delim", func=read_sql, table_name=tbl_name, creds=creds, sql_type="table", schema="dbo", batch_size=chunk_size, check_delim=False, ), ] return {i["title"]: _run_single_func(**i) for i in funcs}
def _test_not_exists(self, df, index): execute_sql_statement( self.pyodbc_creds.engine, f"DROP TABLE IF EXISTS dbo.{self.table_name}" ) assume_not_all_delims_and_quotechars(df) to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="fail", ) actual = pd.read_sql_query(sql=f"SELECT * FROM {self.table_name}", con=self.pyodbc_creds) expected = prep_df_for_comparison(df=df, index=index) assert_frame_equal(expected, actual, check_column_type="equiv")
def test_extra_col(self): # first create a new table and put some data into it self._create_tbl_with_data() # then insert data into only some of the columns, and out of order df_changed = self.df.copy() df_changed.loc[:, "col_N"] = [17.5, 18.5, 19.5, 20.5] with pytest.raises(BCPandasValueError): to_sql( df=df_changed, table_name=self.table_name, creds=self.sql_creds, if_exists="append", index=False, sql_type=self.sql_type, )
def test_cols_skip_and_out_of_order(self): # first create a new table and put some data into it self._create_tbl_with_data() # then insert data into only some of the columns, and out of order df_changed = self.df.loc[:, ["col4", "col1", "col2"]] to_sql( df=df_changed, table_name=self.table_name, creds=self.sql_creds, if_exists="append", index=False, sql_type=self.sql_type, ) # check result self._check_result(df_changed)
def test_tosql_empty_df(df, sql_creds): tbl_name = "tbl_df_empty" schema_name = "dbo" execute_sql_statement(sql_creds.engine, f"DROP TABLE IF EXISTS {schema_name}.{tbl_name}") to_sql(df=df, table_name=tbl_name, creds=sql_creds, schema=schema_name, if_exists="replace") # make sure nothing happened in the database qry = """ SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{_schema}' AND TABLE_NAME = '{_tbl}' """.format( _tbl=tbl_name, _schema=schema_name ) res = pd.read_sql_query(sql=qry, con=sql_creds.engine) # assert that rows == 0, it has columns even without rows because it is an internal system table assert res.shape[0] == 0
def test_tosql_nan_null_inf(df, sql_creds): tbl_name = "tbl_df_nan_null_last_col" schema_name = "dbo" execute_sql_statement(sql_creds.engine, f"DROP TABLE IF EXISTS {schema_name}.{tbl_name}") to_sql( df=df, table_name=tbl_name, creds=sql_creds, schema=schema_name, if_exists="replace", index=False, ) # check result actual = pd.read_sql_query(sql=f"SELECT * FROM {schema_name}.{tbl_name}", con=sql_creds.engine) expected = prep_df_for_comparison(df=df, index=False) assert_frame_equal(expected, actual)
def test_tosql_full_bcp_path(bcp_path, sql_creds): df = pd.DataFrame({"col1": ["a", "b", "c", "d"], "col2": [1.5, 2.5, 3.5, 4.5]}) tbl_name = "tbl_df_full_bcp_path" schema_name = "dbo" execute_sql_statement(sql_creds.engine, f"DROP TABLE IF EXISTS {schema_name}.{tbl_name}") to_sql( df=df, table_name=tbl_name, creds=sql_creds, schema=schema_name, if_exists="replace", index=False, bcp_path=bcp_path, ) # check result actual = pd.read_sql_query(sql=f"SELECT * FROM {schema_name}.{tbl_name}", con=sql_creds.engine) expected = prep_df_for_comparison(df=df, index=False) assert_frame_equal(expected, actual)
def test_cols_out_of_order(self): """ see https://docs.microsoft.com/en-us/sql/relational-databases/import-export/use-a-format-file-to-map-table-columns-to-data-file-fields-sql-server """ # first create a new table and put some data into it self._create_tbl_with_data() # then insert (append) data with columns out of order df_changed = self.df.iloc[:, ::-1] # columns reversed to_sql( df=df_changed, table_name=self.table_name, creds=self.sql_creds, if_exists="append", index=False, sql_type=self.sql_type, ) # check result self._check_result(df_changed)
def test_skip_cols(self): """ see https://docs.microsoft.com/en-us/sql/relational-databases/import-export/use-a-format-file-to-skip-a-table-column-sql-server """ # first create a new table and put some data into it self._create_tbl_with_data() # then insert (append) data into only some of the columns df_changed = self.df.iloc[:, 1:] to_sql( df=df_changed, table_name=self.table_name, creds=self.sql_creds, if_exists="append", index=False, sql_type=self.sql_type, ) # check result self._check_result(df_changed)
def test_duplicate_columns(sql_creds): df = pd.DataFrame({ "col1": [1.5, 2.5, 3.5, 4.5], "col2": [5.5, 6.5, 7.5, 8.5], "col3": [9.5, 10.5, 11.5, 12.5], "col4": [13.5, 14.5, 15.5, 16.5], }) # set duplicate column df = df.rename(columns={"col3": "col2"}) with pytest.raises(BCPandasValueError): table_name = "tosql_column_scenarios_1" to_sql( df=df, table_name=table_name, creds=sql_creds, if_exists="replace", index=False, sql_type="table", )
def test_batch_size_param(sql_creds, batch_size): """ Test that batch size can't be: - 0 - larger than the number of rows """ df = pd.DataFrame({ "col1": [1.5, 2.5, 3.5, 4.5], "col2": [5.5, 6.5, 7.5, 8.5], "col3": [9.5, 10.5, 11.5, 12.5], "col4": [13.5, 14.5, 15.5, 16.5], }) with pytest.raises(BCPandasValueError): to_sql( df=df, table_name="some_table", creds=sql_creds, if_exists="replace", index=False, sql_type="table", batch_size=batch_size, )
def _test_exists(self, df, index): assume_not_all_delims_and_quotechars(df) # first populate the data to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="replace", ) # then test to sql with option of 'fail' with pytest.raises(BCPandasValueError): to_sql( df=df, table_name=self.table_name, creds=self.sql_creds, index=index, sql_type=self.sql_type, if_exists="fail", )
def test_with_dtype(self): to_sql( df=self.df, table_name=self.table_name, creds=self.sql_creds, index=False, sql_type=self.sql_type, if_exists="replace", dtype={ "col1": sqlalchemy.types.INTEGER(), "col2": sqlalchemy.types.FLOAT(), "col3": sqlalchemy.types.NVARCHAR(length=10), "col4": sqlalchemy.types.DATE(), "col5": sqlalchemy.dialects.mssql.BIT(), }, ) actual = pd.read_sql_query( sql=f""" SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{self.table_name}'""", con=self.pyodbc_creds, ) expected = pd.DataFrame( { "COLUMN_NAME": ["col1", "col2", "col3", "col4", "col5"], "DATA_TYPE": ["int", "float", "nvarchar", "date", "bit"], "CHARACTER_MAXIMUM_LENGTH": [np.NaN, np.NaN, 10.0, np.NaN, np.NaN], } ) assert_frame_equal(expected, actual)
from bcpandas import sqlCred, read_sql, to_sql if __name__=="__main__": creds = sqlCred(database='Northwind_Db',username='******',password='******') dt = read_sql(creds, table_name='Customers') to_sql(creds,dt,table_name='testing') print(dt.iloc[2])