def create_view(view_name, dest_conn=None): """ Checks if view exists before getting a sample json and creating a view based on the table_name :param view_name: table to sample from...also name of destination view to be created :param dest_conn: Destination connection :param src_conn: Source connection :return: True if view created, false if not """ view_count_sql = view_count_query(view_name) view_count = get_rows(view_count_sql, dest_conn)[0].count if view_count == 0: sample_rows = sample_rows_query(view_name) rows = get_rows(sample_rows, dest_conn) if len(rows) > 0: print(rows[0]) sample_json = rows[0].fields_json['f1'] fields = "" for k, v in sample_json.items(): fields += "fields #>> '{f1,%s}' as %s, " % (k, k) fields = fields[:-2] print('Creating view %s' % view_name) view_sql = view_create_query(D_TABLE, fields, view_name) execute(view_sql, dest_conn) return True return False
def test_copy_table_rows_on_error(test_tables, testdb_conn, test_table_data): # Arrange duplicate_id_row_sql = """ INSERT INTO dest (id) VALUES ( 1 )""".strip() execute(duplicate_id_row_sql, testdb_conn) # Act errors = [] copy_table_rows('src', testdb_conn, testdb_conn, target='dest', on_error=errors.extend) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Check that first row was caught as error row, exception = errors[0] assert row.id == 1 assert "unique" in str(exception).lower() # Check that other rows were inserted correctly assert result[1:] == test_table_data[1:]
def test_execute_happy_path(pgtestdb_test_tables, pgtestdb_conn): # Arrange sql = "DELETE FROM src;" # Act execute(sql, pgtestdb_conn) # Assert result = get_rows('SELECT * FROM src;', pgtestdb_conn) assert result == []
def test_execute_with_params(pgtestdb_test_tables, pgtestdb_conn, test_table_data): # Arrange sql = "DELETE FROM src WHERE id = %s;" params = [1] expected = test_table_data[1:] # Act execute(sql, pgtestdb_conn, parameters=params) # Assert result = get_rows('SELECT * FROM src;', pgtestdb_conn) assert result == expected
def refresh_mat_views(dest_conn): """ Checks if refresh_matviews() function is defined in destination database before calling it :param dest_conn: Destination connection :return: True if materialized views refreshed, false if not. """ count_query = "SELECT count(*)FROM information_schema.routines where routine_name = 'refresh_matviews';" counts = get_rows(count_query, dest_conn)[0].count if counts > 0: print('Refreshing materialized views') q = "select * from refresh_matviews();" execute(q, dest_conn) print('Done') return True return False
def test_logging_execute(caplog, level, expected, pgtestdb_conn): # Arrange caplog.set_level(level, logger=logger.name) select_sql = "SELECT 1 AS result;" # Act execute(select_sql, pgtestdb_conn) # ID for connection object and hostname vary between tests # and test environments messages = [re.sub(r'object at .*;', 'object at ???;', m) for m in caplog.messages] messages = [re.sub(r'host=.*? ', 'host=??? ', m) for m in messages] # Assert for i, message in enumerate(messages): assert message == expected[i]
def mysql_copy_src_to_dest(src_conn, s_tables, dest_conn): if S_TABLES == '__all__': tables_query = get_tables_query() rows = get_rows_func(tables_query, src_conn) for row in rows: s_tables.append(row[0]) else: s_tables = S_TABLES.split(",") for S_DB_TABLE in s_tables: select_sql = table_select_query(S_DB_TABLE, src_conn, offset=0, limit=1000) print('Copying data from %s' % S_DB_TABLE) cursor = src_conn.cursor() cursor.execute(select_sql) rows = cursor.fetchall() for row in rows: insert_sql = "INSERT INTO {0} (table_name, fields) VALUES ('{1}', '{2}')".format(D_TABLE, row[0], row[1]) execute(insert_sql, dest_conn) create_view(S_DB_TABLE, dest_conn)
def copy_src_to_dest(): delete_sql = table_delete_query(D_TABLE) # USE THIS TO CLEAR DESTINATION FOR IDEMPOTENCE src_conn = get_source_connection() print('Connected to source') dest_conn = get_destination_connection() print('Connected to destination') s_tables = [] if DCLEAR: execute(delete_sql, dest_conn) print('Cleared destination') if engine == ENGINES['mysql']: mysql_copy_src_to_dest(src_conn, s_tables, dest_conn) else: if S_TABLES == '__all__': tables_query = get_tables_query() rows = get_rows(tables_query, src_conn) for row in rows: s_tables.append(row.tablename) else: s_tables = S_TABLES.split(",") for S_DB_TABLE in s_tables: record_count = get_rows("select count(*) from {0}".format(S_DB_TABLE), src_conn)[0].count offset = 0 limit = 50000 print('Copying {0} records from {1}'.format(record_count, S_DB_TABLE)) while record_count > 0: select_sql = table_select_query(S_DB_TABLE, src_conn, offset=offset, limit=limit) print('Table: {} | Records remaining: {} | Limit: {} | Offset: {} '.format(S_DB_TABLE, record_count,limit, offset)) insert_sql = table_insert_query(D_TABLE) copy_rows(select_sql, src_conn, insert_sql, dest_conn) record_count -= limit offset += limit create_view(S_DB_TABLE, dest_conn) refresh_mat_views(dest_conn)
def test_copy_table_rows_on_error(test_tables, testdb_conn, test_table_data): # Arrange duplicate_id_row_sql = """ INSERT INTO dest (id, day, date_time) VALUES ( 1, TO_DATE('2003/05/03 21:02:44', 'yyyy/mm/dd hh24:mi:ss'), TO_DATE('2003/05/03 21:02:44', 'yyyy/mm/dd hh24:mi:ss') )""".strip() execute(duplicate_id_row_sql, testdb_conn) # Act errors = [] copy_table_rows('src', testdb_conn, testdb_conn, target='dest', on_error=errors.extend) # Assert sql = "SELECT * FROM dest" result = get_rows(sql, testdb_conn) # Fix result date and datetime strings to native classes fixed_dates = [] for row in result: fixed_dates.append((*row[:4], row.DAY.date(), row.DATE_TIME)) # Check that first row was caught as error, noting that Oracle # changes the case of column names row, exception = errors[0] assert row.ID == 1 assert "unique" in str(exception).lower() # Check that other rows were inserted correctly assert fixed_dates[1:] == test_table_data[1:]
def test_execute_bad_query(pgtestdb_test_tables, pgtestdb_conn): sql = "DELETE * FROM this_does_not_exist" with pytest.raises(ETLHelperQueryError): execute(sql, pgtestdb_conn)
def test_bad_constraint(test_tables, testdb_conn): # src already has a row with id=1 insert_sql = "INSERT INTO src (id) VALUES (1)" with pytest.raises(ETLHelperQueryError): execute(insert_sql, testdb_conn)
def test_bad_insert(testdb_conn): insert_sql = "INSERT INTO bad_table (id) VALUES (1)" with pytest.raises(ETLHelperQueryError): execute(insert_sql, testdb_conn)
def test_bad_select(testdb_conn): select_sql = "SELECT * FROM bad_table" with pytest.raises(ETLHelperQueryError): execute(select_sql, testdb_conn)