def setup_guids_postgresql(engine: sa.engine.Engine) -> None: # pragma: no cover """ Set up UUID generation using the pgcrypto extension for postgres This query only needs to be executed once when the database is created """ engine.execute('create EXTENSION if not EXISTS "pgcrypto"')
def db_api(client: TestClient, postgres_db: sa.engine.Engine) -> ProjectDBAPI: db_api = _create_project_db(client) yield db_api # clean the projects postgres_db.execute("DELETE FROM projects")
async def test_create_cluster( enable_dev_features: None, client: TestClient, postgres_db: sa.engine.Engine, logged_user: Dict[str, Any], faker: Faker, user_role: UserRole, cluster_authentication: Callable[[], Dict[str, Any]], expected: ExpectedResponse, ): # check we can create a cluster assert client.app url = client.app.router["create_cluster_handler"].url_for() cluster_data = json.loads( ClusterCreate( endpoint=faker.uri(), authentication=cluster_authentication(), name=faker.name(), type=random.choice(list(ClusterType)), ).json(by_alias=True, exclude_unset=True) ) rsp = await client.post(f"{url}", json=cluster_data) data, error = await assert_status( rsp, expected.forbidden if user_role == UserRole.USER else expected.created, # only accessible for TESTER ) if error: # we are done here return created_cluster = Cluster.parse_obj(data) assert created_cluster # check database entry was correctly created result: ResultProxy = postgres_db.execute( sa.select([clusters]).where(clusters.c.name == cluster_data["name"]) ) assert result, "could not find cluster in database" row = result.fetchone() assert row, "could not find cluster in database" assert row[clusters.c.name] == cluster_data["name"] assert row[clusters.c.owner] == logged_user["primary_gid"] assert ( Cluster( id=row[clusters.c.id], name=cluster_data["name"], type=row[clusters.c.type], endpoint=row[clusters.c.endpoint], authentication=row[clusters.c.authentication], owner=logged_user["primary_gid"], access_rights={logged_user["primary_gid"]: CLUSTER_ADMIN_RIGHTS}, ) == created_cluster ) # cleanup postgres_db.execute(clusters.delete().where(clusters.c.id == row[clusters.c.id]))
def assign_ds_to_user(eng: sa.engine.Engine, drw_srcs: sa.engine.ResultProxy, u_id): relations_to_insert = [{ 'user_id': u_id, 'draw_source_id': ds.id } for ds in drw_srcs] eng.execute( db.user_draw_source_relationship.insert().values(relations_to_insert))
def truncate_table(db_engine: sqlalchemy.engine.Engine, table: str, schema: str = 'public', cascade: bool = False) -> None: """Truncate given table""" db_engine.execute( f"TRUNCATE TABLE {schema + '.' + table} {'CASCADE' if cascade else ''}" )
def db_api(client: TestClient, postgres_db: sa.engine.Engine) -> Iterator[ProjectDBAPI]: assert client.app db_api = client.app[APP_PROJECT_DBAPI] yield db_api # clean the projects postgres_db.execute("DELETE FROM projects")
def run_sql(sql_path: t.Union[Path, str], engine: sqlalchemy.engine.Engine): sql_path = Path(sql_path) if not sql_path.is_file(): raise ValueError(f"There is no file at path {sql_path!s}") with sql_path.open("r") as fd: sql = fd.read() engine.execute(sql)
def run_sql_file(sql_subpath: Union[str, Path], engine: sqlalchemy.engine.Engine) -> None: sql_path = Path(PACKAGE_PATH, 'sql', sql_subpath).resolve() if not sql_path.is_file(): raise ValueError(f"There is no file at path {sql_path!s}") with sql_path.open("r") as fd: sql = fd.read() engine.execute(sql)
def setup_guids(engine: sa.engine.Engine) -> None: """ Set up UUID generation using the uuid-ossp extension for postgres """ database_backend = DatabaseBackend.from_engine(engine) # TODO: Add some way to run postgres-specific tests if database_backend == DatabaseBackend.postgresql: # pragma: no cover # noinspection SqlDialectInspection,SqlNoDataSourceInspection uuid_generation_setup_query = 'create EXTENSION if not EXISTS "pgcrypto"' engine.execute(uuid_generation_setup_query)
def sqlalchemy_hunter_session( sqlalchemy_hunter_bind: sa.engine.Engine, app: Starlette, ): meta = app.state.hunter_service.metadata() sqlalchemy_hunter_bind.execute("CREATE SCHEMA " + meta.schema) meta.create_all(sqlalchemy_hunter_bind) try: yield sqlalchemy_hunter_bind finally: meta.drop_all(sqlalchemy_hunter_bind) sqlalchemy_hunter_bind.execute("DROP SCHEMA " + meta.schema)
def cluster( postgres_db: sa.engine.Engine, faker: Faker ) -> Iterable[Callable[[GroupID, Dict[GroupID, ClusterAccessRights]], Coroutine[Any, Any, Cluster]]]: list_of_created_cluster_ids = [] async def creator( gid: GroupID, cluster_access_rights: Dict[GroupID, ClusterAccessRights] = None ) -> Cluster: new_cluster = ClusterCreate( **{ "name": faker.name(), "type": random.choice(list(ClusterType)), "owner": gid, "access_rights": cluster_access_rights or {}, }) result = postgres_db.execute(clusters.insert().values( new_cluster.dict(by_alias=True, exclude={"id", "access_rights" })).returning(literal_column("*"))) cluster_in_db = result.first() assert cluster_in_db is not None new_cluster_id = cluster_in_db[clusters.c.id] list_of_created_cluster_ids.append(new_cluster_id) # when a cluster is created, the DB automatically creates the owner access rights for group_id, access_rights in new_cluster.access_rights.items(): result = postgres_db.execute( insert(cluster_to_groups).values( **{ "cluster_id": new_cluster_id, "gid": group_id, "read": access_rights.read, "write": access_rights.write, "delete": access_rights.delete, }).on_conflict_do_nothing()) return Cluster(id=new_cluster_id, **new_cluster.dict(by_alias=True, exclude={"id"})) yield creator # clean up postgres_db.execute(clusters.delete().where( clusters.c.id.in_(list_of_created_cluster_ids)))
def _assert_project_db_row(postgres_db: sa.engine.Engine, project: Dict[str, Any], **kwargs): row: Optional[Row] = postgres_db.execute( f"SELECT * FROM projects WHERE \"uuid\"='{project['uuid']}'").fetchone( ) expected_db_entries = { "type": "STANDARD", "uuid": project["uuid"], "name": project["name"], "description": project["description"], "thumbnail": project["thumbnail"], "prj_owner": None, "workbench": project["workbench"], "published": False, "access_rights": {}, "dev": project["dev"], "classifiers": project["classifiers"], "ui": project["ui"], "quality": project["quality"], "creation_date": to_datetime(project["creationDate"]), "last_change_date": to_datetime(project["lastChangeDate"]), } expected_db_entries.update(kwargs) for k in expected_db_entries: assert (row[k] == expected_db_entries[k] ), f"project column [{k}] does not correspond" assert row["last_change_date"] >= row["creation_date"]
async def test_delete_cluster( enable_dev_features: None, client: TestClient, postgres_db: sa.engine.Engine, logged_user: Dict[str, Any], second_user: Dict[str, Any], cluster: Callable[..., Coroutine[Any, Any, Cluster]], faker: Faker, user_role: UserRole, expected: ExpectedResponse, ): # deleting a non-existing cluster returns not found url = client.app.router["delete_cluster_handler"].url_for(cluster_id=f"{25}") rsp = await client.delete(f"{url}") data, error = await assert_status(rsp, expected.not_found) if error and user_role in [UserRole.ANONYMOUS, UserRole.GUEST]: return assert data is None # create our own cluster allows us to delete it admin_cluster: Cluster = await cluster(GroupID(logged_user["primary_gid"])) url = client.app.router["delete_cluster_handler"].url_for( cluster_id=f"{admin_cluster.id}" ) rsp = await client.delete(f"{url}") data, error = await assert_status(rsp, expected.no_content) assert data is None # check it was deleted result: ResultProxy = postgres_db.execute( sa.select([clusters]).where(clusters.c.id == admin_cluster.id) ) assert result.rowcount == 0
def list_dbs(connection: sqlalchemy.engine.Engine): """List databases from a connectionection.""" response = connection.execute(""" SELECT datname FROM pg_database """).fetchall() return [r[0] for r in response]
def list_tables(connection: sqlalchemy.engine.Engine, schema: str = "public"): """List tables in a database.""" # WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema' response = connection.execute(f""" SELECT tablename FROM pg_catalog.pg_tables WHERE schemaname = '{schema}' """).fetchall() return [r[0] for r in response]
def create_user(eng: sa.engine.Engine): user = { 'name': 'User', 'email': '*****@*****.**', 'password': hash_password('qwerty'), 'phone': '123321123', } res: sa.engine.ResultProxy = eng.execute( db.user.insert().values(user).returning(db.user.c.id)) return res.first().id
def pandas_delete_and_insert(mysql_tables: str, df: pd.DataFrame, engine: sqlalchemy.engine.Engine): """Delete from the named table and insert :param mysql_tables: Either a single value or | separated list of tables that will be inserted :type mysql_tables: str :param df: Either a single dataframe or one that has column names split by table_name.column_name :type df: pandas.DataFrame :param engine: SQLAlchemy engine :type engine: sqlalchemy.engine.Engine """ mysql_tables = mysql_tables.split("|") for mysql_table in mysql_tables: # Try to split off the index table_name, index_name = (mysql_table.split("@") + [None] * 2)[:2] # Go though each table in the array query = f"delete from {table_name}" engine.execute(query) # write to MySQL if len(mysql_tables) > 1: table_prefix = table_name + "." # Filter and Remove the table name from column so it can be written back df_tmp = df.filter(like=table_prefix) df_tmp.rename(columns=lambda x: str(x)[len(table_prefix):], inplace=True) if index_name: # Drop anything na then drop the duplicates if any df_tmp.dropna(subset=index_name.split(), inplace=True) df_tmp.drop_duplicates(subset=index_name, inplace=True) else: df_tmp = df try: df_tmp.to_sql(con=engine, name=table_name, if_exists='append', index=False) except Exception: logger.exception(f"Error running to_sql on table {table_name}") raise
def list_schemas(connection: sqlalchemy.engine.Engine): """List non-system schemas in a database.""" response = connection.execute(""" SELECT schema_name FROM information_schema.schemata """).fetchall() system_schemas = ( "pg_catalog", "information_schema", "pg_toast", "pg_temp_1", "pg_toast_temp_1", ) return [r[0] for r in response if r[0] not in system_schemas]
def create_ds_from_fixtures(eng: sa.engine.Engine, file) -> sa.engine.ResultProxy: with open(file) as f: data = json.loads(f.read()) draw_sources_to_insert = [] for company_name, company_ds in data.items(): res: sa.engine.ResultProxy = eng.execute( db.company.select(db.company.c.name == company_name)) if res.rowcount == 1: company_id = str(res.first().id) else: res: sa.engine.ResultProxy = eng.execute( db.company.insert().values(name=company_name).returning( db.company.c.id)) company_id = str(res.first().id) draw_sources_to_insert.extend([{ **x, 'company_id': company_id } for x in company_ds]) created_ds: sa.engine.ResultProxy = eng.execute( db.draw_source.insert().values(draw_sources_to_insert).returning( db.draw_source.c.id)) return created_ds
def list_meta_data(connection: sqlalchemy.engine.Engine, table_name: str, schema: str) -> list: """List metadata for table in a particular schema""" response = connection.execute(f""" SELECT cols.column_name, cols.data_type,cols.is_nullable, col_description((table_schema||'.'||table_name)::regclass::oid, ordinal_position) as column_comment FROM information_schema.columns cols WHERE cols.table_schema = '{schema}' AND cols.table_name = '{table_name}';""") rows = response.fetchall() cols = response.keys() return rows, list(cols)
def check_if_table_or_view_exists(db_engine: sqlalchemy.engine.Engine, table_or_view: str, schema: str = 'public') -> bool: """Check if table or view exists in the db :param db_engine: PostgreSQL db engine :param table_or_view: table_or_view to read from :param schema: table_or_view schema """ query = f""" SELECT 1 FROM INFORMATION_SCHEMA.tables WHERE table_schema = '{schema}' AND table_name = '{table_or_view}' UNION ALL SELECT 1 FROM INFORMATION_SCHEMA.views WHERE table_schema = '{schema}' AND table_name = '{table_or_view}'; """ r = db_engine.execute(query) if r.fetchall(): return True else: return False
def upsert_df(df: pd.DataFrame, table_name: str, engine: sqlalchemy.engine.Engine): """Implements the equivalent of pd.DataFrame.to_sql(..., if_exists='update') (which does not exist). Creates or updates the db records based on the dataframe records. Conflicts to determine update are based on the dataframes index. This will set primary keys on the table equal to the index names 1. Create a temp table from the dataframe 2. Insert/update from temp table into table_name Returns: True if successful """ # If the table does not exist, we should just use to_sql to create it if not engine.execute(f"""SELECT EXISTS ( SELECT FROM information_schema.tables WHERE table_schema = 'public' AND table_name = '{table_name}'); """).first()[0]: df.to_sql(table_name, engine) return True # If it already exists... temp_table_name = f"temp_{uuid.uuid4().hex[:6]}" df.to_sql(temp_table_name, engine, index=True) index = list(df.index.names) index_sql_txt = ", ".join([f'"{i}"' for i in index]) columns = list(df.columns) headers = index + columns headers_sql_txt = ", ".join([f'"{i}"' for i in headers ]) # index1, index2, ..., column 1, col2, ... # col1 = exluded.col1, col2=excluded.col2 update_column_stmt = ", ".join( [f'"{col}" = EXCLUDED."{col}"' for col in columns]) # For the ON CONFLICT clause, postgres requires that the columns have unique constraint query_pk = f""" ALTER TABLE "{table_name}" ADD CONSTRAINT {table_name}_unique_constraint_for_upsert UNIQUE ({index_sql_txt}); """ try: engine.execute(query_pk) except Exception as e: # relation "unique_constraint_for_upsert" already exists if not 'unique_constraint_for_upsert" already exists' in e.args[0]: engine.execute(f'DROP TABLE "{temp_table_name}"') raise e # Compose and execute upsert query query_upsert = f""" INSERT INTO "{table_name}" ({headers_sql_txt}) SELECT {headers_sql_txt} FROM "{temp_table_name}" ON CONFLICT ({index_sql_txt}) DO UPDATE SET {update_column_stmt}; """ try: engine.execute(query_upsert) except Exception as e: logger.error(e) engine.execute(f'DROP TABLE "{temp_table_name}"') raise e engine.execute(f'DROP TABLE "{temp_table_name}"') return True
def insert_into_table(table: sqlalchemy.Table, db_connection: sqlalchemy.engine.Engine, key_value_dict: dict): assert isinstance(table, sqlalchemy.Table) insert_clause = table.insert().values(**key_value_dict) db_connection.execute(insert_clause)
def execute_sql(engine: sqlalchemy.engine.Engine, sql: str): """Logs and executes a raw SQL string""" LOG.info(f'Executing SQL:\n{sql}') engine.execute(sql)
def add_sdo_geo_to_table(table_name: str, wkt_geo_column: str, geo_sdo_column: str, eng: sa.engine.Engine, is_wkt=True, no_asserts=False, dispose_eng=False): """ Adds a separate SDO_GEOMETRY column from an existing wkt/wkb column Args: table_name: The table we're working on wkt_geo_column: The name of the column containing the wkt/wkb geo_sdo_column: The name of the column we want to store the sdo_geometry object eng: An engine object connecting the db is_wkt: True if wk_geo_column contains data in wkt format (otherwise in wkb) no_asserts: True if no asserts on columns are made (will override existing data) dispose_eng: Whether to dispose of the engine after the function Returns: None """ df = get_df(f"SELECT * FROM {table_name} WHERE ROWNUM < 1", eng) # fetch only one row if not no_asserts: assert wkt_geo_column in df.columns, f"{wkt_geo_column} not in table {table_name}" assert geo_sdo_column not in df.columns, f"{geo_sdo_column} already in table {table_name}" if geo_sdo_column not in df.columns and wkt_geo_column in df.columns: eng.execute(f""" ALTER TABLE {table_name} ADD {geo_sdo_column} SDO_GEOMETRY """) eng.execute("COMMIT") # run for each feature seperetly feature_names = pd.read_sql( f"""select distinct {FEATURE_NAME} from {table_name}""", eng).iloc[:, 0] conn = eng.raw_connection() cur = conn.cursor() def add_sdo(feature_name): SELECT_SDO_GEO = f"""select SDO_GEOMETRY({wkt_geo_column}, 4326) as {geo_sdo_column}, ROWID as rid from {table_name} where {geo_sdo_column} IS NULL and {FEATURE_NAME} = '{feature_name}' """ # TIP: when using weird SDO_UTIL functions its better to use the raw connection. # In this case no values were returned by the merge into. only with the cur.execute(f""" merge into {table_name} curr using ({SELECT_SDO_GEO}) tmp on (curr.ROWID = tmp.rid) when matched then update set curr.{geo_sdo_column} = tmp.{geo_sdo_column} """) conn.commit() [ add_sdo(feature_name) for feature_name in tqdm( feature_names, desc='adding SDO to features', unit='feature') ] cur.close() # fix coordinate system eng.execute( f"update {table_name} T set T.{geo_sdo_column}.SDO_SRID = 4326 WHERE T.{geo_sdo_column} is not null" ) # add spatial index and add to user_sdo_geom_metadata table usersdo_df = get_df("SELECT * FROM user_sdo_geom_metadata", eng) if (table_name, geo_sdo_column) not in [ tuple(row) for row in usersdo_df[['TABLE_NAME', 'COLUMN_NAME']].values ]: eng.execute(f""" INSERT INTO user_sdo_geom_metadata VALUES ('{table_name}', '{geo_sdo_column}', sdo_dim_array(sdo_dim_element('X', -100, 100, 0.000005), sdo_dim_element('Y', -100, 100, 0.000005)), 4326) """) is_there_index = len( eng.execute(f""" select index_name from SYS.ALL_INDEXES where table_name = '{table_name}' """).fetchall()) > 0 if not is_there_index: acronym_short_geo_sdo = ''.join([ s[0] for s in geo_sdo_column.split('_') ]) # first letter of each word eng.execute(f""" CREATE INDEX {table_name}_{acronym_short_geo_sdo}_idx ON {table_name} ({geo_sdo_column}) INDEXTYPE IS MDSYS.SPATIAL_INDEX """) if dispose_eng: eng.dispose()