def test_add_new_column(engine, schema): # config table_name = TableNames.ADD_NEW_COLUMN dtype = { 'id': VARCHAR(5) } if 'mysql' in engine.dialect.dialect_description else None df = pd.DataFrame({'id': ['foo']}).set_index('id') # common kwargs for all the times we use upsert_or_aupsert common_kwargs = dict(con=engine, schema=schema, table_name=table_name, if_row_exists='update', dtype=dtype) # 1. create table upsert_or_aupsert(df=df, **common_kwargs) # 2. add a new column and repeat upsert_or_aupsert df['new_column'] = 'bar' upsert_or_aupsert(df=df, add_new_columns=True, **common_kwargs) # verify content matches df_db = select_table(engine=engine, schema=schema, table_name=table_name, index_col='id').sort_index() pd.testing.assert_frame_equal(df, df_db)
def test_upsert_with_unique_keys(engine, schema): # config/local helpers # common kwargs for all the upsert commands below table_name = TableNames.UNIQUE_KEY common_kwargs_upsert = dict(con=engine, schema=schema, table_name=table_name, if_row_exists='update') def read_from_db(): return select_table(engine=engine, schema=schema, table_name=table_name, index_col='row_id') # create table create_test_table(engine=engine, schema=schema) # add initial data (df_old) upsert_or_aupsert(df=df_old, **common_kwargs_upsert) df = read_from_db() df_expected = df_old.assign(row_id=range(1, 4)).reset_index().set_index('row_id') pd.testing.assert_frame_equal(df, df_expected) # add new data (df_new) upsert_or_aupsert(df=df_new, **common_kwargs_upsert) df = read_from_db() # before creating our expected df we need to implement the special case of postgres # where the id of the last row will be 7 instead of 4. I suppose that PG's ON # CONFLICT UPDATE clause will run in such a way that it will count 4 (number we # would expected) + 3 (three previous rows that were updated) last_row_id = 7 if 'postgres' in engine.dialect.dialect_description else 4 df_expected = (pd.DataFrame([[1, 'A0001', 'PD100', 10], [2, 'A0002', 'PD200', 20], [3, 'A0002', 'PD201', 77], [last_row_id, 'A0003', 'PD300', 30]], columns=['row_id'] + df_old.reset_index().columns.tolist()) .set_index('row_id')) pd.testing.assert_frame_equal(df, df_expected)
def test_mysql_pk_not_auto_incremented(engine, schema): if 'mysql' not in engine.dialect.dialect_description: pytest.skip('This test is only relevant for MySQL') table_name = TableNames.PK_MYSQL # upsert first df using pangres which creates the table automatically df1 = pd.DataFrame({'id': [0, 1], 'name': ['foo', 'bar']}).set_index('id') upsert_or_aupsert(con=engine, df=df1, table_name=table_name, if_row_exists='update') # upsert second df df2 = pd.DataFrame({ 'id': [100, 200], 'name': ['baz', 'qux'] }).set_index('id') upsert_or_aupsert(con=engine, df=df2, table_name=table_name, if_row_exists='update') # read df back df_db = select_table(engine=engine, schema=schema, table_name=table_name, index_col='id') # check mysql got that correctly pd.testing.assert_frame_equal(df_db.sort_index(), pd.concat((df1, df2)).sort_index())
def test_end_to_end(engine, schema, create_table, if_row_exists, df_expected): # config # dtype for index for MySQL... (can't have flexible text length) dtype = { 'profileid': VARCHAR(10) } if 'mysql' in engine.dialect.dialect_description else None table_name = TableNames.END_TO_END # common kwargs for every time we use the upsert_or_aupsert function common_kwargs = dict(if_row_exists=if_row_exists, dtype=dtype, table_name=table_name) read_table = lambda: read_example_table_from_db( engine=engine, schema=schema, table_name=table_name).sort_index() # 1. create table upsert_or_aupsert(con=engine, schema=schema, df=df, create_table=True, **common_kwargs) pd.testing.assert_frame_equal(df, read_table()) # 2. insert update/ignore upsert_or_aupsert(con=engine, schema=schema, df=df2, create_table=create_table, **common_kwargs) pd.testing.assert_frame_equal(df_expected, read_table())
def test_column_named_values(engine, schema): df = pd.DataFrame({'values': range(5, 9)}, index=pd.Index(range(1, 5), name='idx')) upsert_or_aupsert(con=engine, schema=schema, df=df, if_row_exists='update', table_name=TableNames.COLUMN_NAMED_VALUES)
def test_bad_value_if_row_exists(_): df = pd.DataFrame({'id': [0]}).set_index('id') engine = create_engine('sqlite:///') with pytest.raises(ValueError) as excinfo: upsert_or_aupsert(con=engine, df=df, table_name=TableNames.NO_TABLE, if_row_exists='test') assert 'must be "ignore" or "update"' in str(excinfo.value)
def test_bad_column_name_postgres_raises(engine, schema): if 'postgres' not in engine.dialect.dialect_description: pytest.skip('This test is only relevant for PostgreSQL') df = pd.DataFrame({'id': [0], '(test)': [0]}).set_index('id') with pytest.raises(BadColumnNamesException) as exc_info: upsert_or_aupsert(con=engine, schema=schema, df=df, table_name=TableNames.BAD_COLUMN_NAMES, if_row_exists='update') assert 'does not seem to support column names with' in str(exc_info.value)
def test_bad_text_insert(engine, schema): # add bad text in a column named 'text' create_random_text = lambda: ''.join( random.choice(bad_char_seq) for i in range(10)) df_test = (pd.DataFrame({ 'text': [create_random_text() for i in range(10)] }).rename_axis(['profileid'], axis='index', inplace=False)) upsert_or_aupsert(con=engine, schema=schema, table_name=TableNames.BAD_TEXT, df=df_test, if_row_exists='update')
def test_index_with_null(engine, schema): df = pd.DataFrame({'ix': [None, 0], 'test': [0, 1]}).set_index('ix') with pytest.raises(IntegrityError): upsert_or_aupsert(con=engine, schema=schema, df=df, table_name=TableNames.INDEX_WITH_NULL, if_row_exists='update') # don't test error for mysql since only a warning is raised and the line is skipped if 'mysql' in engine.dialect.dialect_description: pytest.skip( 'not tested with mysql as only a warning is issued and the line is skipped. ' "Perhaps we could capture the warning with pytest?")
def test_create_schema_none(engine, schema): """ If `create_schema` is True in `pangres.upsert_or_aupsert` but the schema is `None` we should not raise an error even if it is a database that does not support schemas """ df = pd.DataFrame({'id': [0]}).set_index('id') upsert_or_aupsert(con=engine, schema=None, df=df, if_row_exists='update', create_schema=True, table_name=TableNames.CREATE_SCHEMA_NONE, create_table=True)
def test_cannot_insert_missing_table_no_create(engine, schema): """ Check if an error is raised when trying to insert in a missing table and `create_table` is False. """ df = pd.DataFrame({'id': [0]}).set_index('id') with pytest.raises((OperationalError, ProgrammingError)) as excinfo: upsert_or_aupsert(con=engine, schema=schema, df=df, table_name=TableNames.NO_TABLE, if_row_exists='update', create_table=False) assert any(s in str(excinfo.value) for s in ('no such table', 'does not exist', "doesn't exist"))
def insert_chunks(engine, schema, chunksize, nb_rows): df = _TestsExampleTable.create_example_df(nb_rows=nb_rows) # MySQL does not want flexible text length in indices/PK dtype = { 'profileid': VARCHAR(10) } if 'mysql' in engine.dialect.dialect_description else None upsert_or_aupsert(schema=schema, table_name=TableNames.VARIOUS_CHUNKSIZES, df=df, chunksize=chunksize, con=engine, if_row_exists='update', dtype=dtype) df_db = read_example_table_from_db( engine=engine, schema=schema, table_name=TableNames.VARIOUS_CHUNKSIZES) # sort index (for MySQL...) pd.testing.assert_frame_equal(df.sort_index(), df_db.sort_index())
def test_only_index(engine, schema, if_row_exists): # config table_name = TableNames.INDEX_ONLY_INSERT # upsert df with only index df = pd.DataFrame({'ix': [1]}).set_index('ix') upsert_or_aupsert(con=engine, schema=schema, df=df, table_name=table_name, if_row_exists=if_row_exists) # check data integrity df_db = select_table(engine=engine, schema=schema, table_name=table_name) assert 'ix' in df_db.columns assert len(df_db) > 0 assert df_db['ix'].iloc[0] == 1
def test_create_schema_not_none(engine, schema): # local helpers is_postgres = 'postgres' in engine.dialect.dialect_description # overwrite schema schema = schema_for_testing_creation # config df = pd.DataFrame({'id': [0]}).set_index('id') table_name = TableNames.CREATE_SCHEMA_NOT_NONE # drop table before test (could not get my decorator to work with another schema # when having an optional arg schema=None due to variable scopes problems) if is_postgres: drop_table(engine=engine, schema=schema, table_name=table_name) if is_postgres: drop_schema(engine=engine, schema=schema) try: upsert_or_aupsert(con=engine, schema=schema, df=df, if_row_exists='update', create_schema=True, table_name=table_name, create_table=True) if not is_postgres: raise AssertionError( 'Expected the upsert_or_aupsert to fail when not using a postgres database' ) except Exception as e: # for postgres this should have worked if is_postgres: raise e else: assert isinstance(e, HasNoSchemaSystemException) finally: # drop table and schema after test if is_postgres: drop_table(engine=engine, schema=schema, table_name=table_name) drop_schema(engine=engine, schema=schema)
def test_bad_column_names(engine, schema, iteration): # add columns with bad names # don't do this for MySQL which has more strict rules for column names if 'mysql' in engine.dialect.dialect_description: pytest.skip( 'MySQL has very strict rules for column names so we do not even test it' ) random_bad_col_name = ''.join( random.choice(bad_char_seq) for i in range(50)) df_test = (pd.DataFrame({ random_bad_col_name: ['test', None] }).rename_axis(['profileid'], axis='index', inplace=False)) # psycopg2 can't process columns with "%" or "(" or ")" so we will need `fix_psycopg2_bad_cols` if 'postgres' in engine.dialect.dialect_description: df_test = fix_psycopg2_bad_cols(df_test) upsert_or_aupsert(con=engine, schema=schema, df=df_test, table_name=TableNames.BAD_COLUMN_NAMES, if_row_exists='update')
def test_create_and_insert_table_multiindex(engine, schema): # local helpers table_name = TableNames.MULTIINDEX namespace = get_table_namespace(schema=schema, table_name=table_name) def read_from_db(): df_db = select_table(engine=engine, schema=schema, table_name=table_name) df_db['ix3'] = pd.to_datetime(df_db['ix3']) return df_db.set_index(index_col) # dtype for index for MySQL... (can't have flexible text length) dtype = { 'ix2': VARCHAR(5) } if 'mysql' in engine.dialect.dialect_description else None # create upsert_or_aupsert(con=engine, schema=schema, df=df_multiindex, table_name=table_name, dtype=dtype, if_row_exists='update') df_db = read_from_db() pd.testing.assert_frame_equal(df_db, df_multiindex) # insert upsert_or_aupsert(con=engine, schema=schema, df=df_multiindex2, table_name=table_name, dtype=dtype, if_row_exists='update') df_db = read_from_db() pd.testing.assert_frame_equal( df_db, pd.concat(objs=[df_multiindex, df_multiindex2]))
def test_adapt_column_type(engine, schema): # skip for sqlite as it does not support such alteration if 'sqlite' in engine.dialect.dialect_description: pytest.skip('such column alteration is not possible with SQlite') # config table_name = TableNames.CHANGE_EMPTY_COL_TYPE dtype = { 'id': VARCHAR(5) } if 'mysql' in engine.dialect.dialect_description else None df = pd.DataFrame({'id': ['foo'], 'empty_column': [None]}).set_index('id') # common kwargs for all the times we use upsert_or_aupsert common_kwargs = dict(con=engine, schema=schema, df=df, table_name=table_name, if_row_exists='update', dtype=dtype) # 1. create table upsert_or_aupsert(**common_kwargs) # 2. add non string data in empty column and repeat upsert_or_aupsert df['empty_column'] = 1 upsert_or_aupsert(**common_kwargs, adapt_dtype_of_empty_db_columns=True)
def create_or_upsert_with_pangres(engine, schema, if_row_exists, df, chunksize, **kwargs): # MySQL does not want flexible text length in indices/PK dtype={'profileid':VARCHAR(10)} if 'mysql' in engine.dialect.dialect_description else None upsert_or_aupsert(con=engine, df=df, schema=schema, chunksize=chunksize, table_name=TableNames.BENCHMARK, if_row_exists=if_row_exists, dtype=dtype, **kwargs)