def add_index(engine: Engine, table_name: str, column: sa.Column): """Create an index based on the column index definition calling the compiled SQL statement: CREATE INDEX index_name ON table_name (column_name) :param engine: the bound sql database engine :param table_name: the name of the table with the column :param column: the instantiated column definition :return: - nothing - """ c_table_name = _compile_name(table_name, dialect=engine.dialect) c_column_name = column.compile(dialect=engine.dialect) index_name = "ix_%s_%s" % (table_name, column.name) c_index_name = _compile_name(index_name, dialect=engine.dialect) engine.execute('CREATE INDEX %s ON %s ( %s )' % (c_index_name, c_table_name, c_column_name))
def __to_db(self, dataframe: DataFrame, conn: Engine, params, **kwargs) -> str: table_name = params.get("table_name") batch_size = params.get("batch_size") mode = params.get("mode", 'append') index_flag = params.get("index") index_label = params.get("index_label") try: if mode == 'truncate' and conn.has_table(table_name=table_name): conn.execution_options(autoCommit=True)\ .execute(f"""TRUNCATE TABLE {table_name}""") dataframe.to_sql(con=conn, name=table_name, if_exists=self.modes.get(mode), chunksize=batch_size, index=index_flag, index_label=index_label, **kwargs) except Exception as err: msg = ("Error: Check your credentials (username," " password, host, port, database)\n") raise ValueError(msg, err)
def __init__(self, pool, dialect, u, single_worker=True, **kwargs): if single_worker: worker = ExecutorThread() else: worker = None self._worker = worker self._engine = Engine(pool, dialect, u, **kwargs)
def iterate_csv_dir(db_engine: engine.Engine = None): current_dir = os.getcwd() db_config = config.DB() for _, services_dir, _ in os.walk(f'{current_dir}/csv'): for service_db in services_dir: db_engine = create_engine(db_config.print()) db_connection = db_engine.raw_connection() try: db_cursor = db_connection.cursor() db_cursor.execute( f"SELECT 1 FROM pg_catalog.pg_database WHERE datname = '{service_db}'" ) db_exists = db_cursor.fetchone() if not db_exists: db_cursor.execute(f'CREATE DATABASE {service_db}') db_cursor.close() db_connection.commit() finally: db_connection.close() db_engine.dispose() db_engine = create_engine(db_config.print(service_db)) for filename in os.listdir(f'{current_dir}/csv/{service_db}'): filename = os.path.splitext(filename)[0] from_csv_to_db(service_db, filename, db_engine) db_engine.dispose()
def execute(engine: Engine, sql: str) -> None: log.debug(sql) if _print_not_execute: print(format_sql_for_print(sql) + "\n;") # extra \n in case the SQL ends in a comment else: engine.execute(sql)
def delete(cls, db_engine: engine.Engine, email: str): query = """ DELETE FROM {}.{} WHERE email = '{}' """.format(PATT_SCHEMA_NAME, cls.__tablename__, email) db_engine.execute(query)
def __init__(self, pool, dialect, url, logging_name=None, echo=None, execution_options=None, loop=None, **kwargs): self._engine = Engine(pool, dialect, url, logging_name=logging_name, echo=echo, execution_options=execution_options, **kwargs) self._loop = loop max_workers = None # https://www.python.org/dev/peps/pep-0249/#threadsafety if dialect.dbapi.threadsafety < 2: # This might seem overly-restrictive, but when we instantiate an # AsyncioResultProxy from AsyncioEngine.execute, subsequent # fetchone calls could be in different threads. Let's limit to one. max_workers = 1 self._engine_executor = ThreadPoolExecutor(max_workers=max_workers)
def create_tables_if_not_existing(engine: Engine): metadata = MetaData() metadata.bind = engine tablename_jira_issue_created = 'JiraIssueCreated' tablename_jira_issue_updated = 'JiraIssueUpdated' try: if not (engine.has_table(tablename_jira_issue_created) and engine.has_table(tablename_jira_issue_updated)): issue_created = Table( tablename_jira_issue_created, metadata, Column('issue', String(32), primary_key=True, nullable=False), Column('created', TIMESTAMP, nullable=False)) issue_updated = Table( tablename_jira_issue_updated, metadata, Column('id', BIGINT(), Sequence('id', start=1, increment=1), primary_key=True), Column('issue', String(32), ForeignKey(issue_created.c.issue), nullable=False), Column('updated', TIMESTAMP, nullable=False), Column('issue_status', String(32)), Column('customer', String(200))) metadata.create_all() finally: engine.dispose()
def upsert_admin(cls, db_engine: engine.Engine) -> None: db_config = Config() query = """ INSERT INTO {0}.{1} (email, password, status, role, created_at, updated_at) VALUES ('{2}', '{3}', '{4}', '{5}', '{6}', '{7}') ON CONFLICT (email) DO UPDATE SET email = excluded.email, password = excluded.password, status = excluded.status, role = excluded.role, updated_at = excluded.updated_at """.format( PATT_SCHEMA_NAME, cls.__tablename__, db_config.admin_email, Bcrypt().generate_password_hash( db_config.admin_password).decode("utf-8"), UserStatuses.confirmed, Roles.admin, datetime.utcnow(), datetime.utcnow(), ) db_engine.execute(query)
def check_or_update_odc_schema(engine: Engine): """ Check that the ODC schema is updated enough to run Explorer, and either update it safely (if we have permission), or tell the user how. """ # We need the `update` column on ODC's dataset table in order to run incremental product refreshes. try: # We can try to install it ourselves if we have permission, using ODC's code. if not pg_column_exists(engine, ODC_DATASET.fullname, "updated"): _LOG.warn("schema.applying_update.add_odc_change_triggers") _utils.install_timestamp_trigger(engine) except ProgrammingError as e: # We don't have permission. raise SchemaNotRefreshable( dedent(""" Missing update triggers. No dataset-update triggers are installed on the ODC instance, and Explorer does not have enough permissions to add them itself. It's recommended to run `datacube system init` on your ODC instance to install them. Then try this again. """)) from e # Add optional indexes to AGDC if we have permission. # (otherwise we warn the user that it may be slow, and how to add it themselves) statements = [] try: if not pg_index_exists(engine, ODC_DATASET.schema, ODC_DATASET.name, "ix_dataset_added"): _LOG.warn("schema.applying_update.add_odc_added_index") statements.append( f"create index ix_dataset_added on {ODC_DATASET.fullname}(added desc);" ) if not pg_index_exists(engine, ODC_DATASET.schema, ODC_DATASET.name, "ix_dataset_type_changed"): _LOG.warn("schema.applying_update.add_odc_changed_index") statements.append( f"create index ix_dataset_type_changed on " f"{ODC_DATASET.fullname}(dataset_type_ref, greatest(added, updated, archived) desc);" ) while statements: engine.execute(statements[-1]) statements.pop() except ProgrammingError: unexecuted_sql = "\n ".join(statements) warnings.warn( dedent(f""" No recently-added index. Explorer recommends adding an index for recently-added datasets to your ODC, but does not have permission to add it to the current ODC database. It's recommended to add it manually in Postgres: {unexecuted_sql} """)) raise
def drop_all(engine: Engine) -> None: from quiz_bot.db.base import metadata click.echo('Dropping schema...') for table in metadata.tables: engine.execute(f'DROP TABLE IF EXISTS "{table}" CASCADE') metadata.drop_all() click.echo('Schema successfully dropped!')
def update_schema(engine: Engine): """Update the schema if needed.""" if not pg_column_exists(engine, f"{CUBEDASH_SCHEMA}.product", "fixed_metadata"): _LOG.info("schema.applying_update.add_fixed_metadata") engine.execute(f""" alter table {CUBEDASH_SCHEMA}.product add column fixed_metadata jsonb """)
def establish_connection(bind: engine.Engine) -> engine.Engine: for _ in range(100): try: bind.connect() break except exc.OperationalError: time.sleep(0.05) return bind
def does_postgres_accept_connection(engine: Engine) -> bool: """ Test if the target PostgreSQL database accept connexions """ try: engine.connect() except OperationalError: return False else: return True
def create_index(engine: Engine, directory: str): try: print("Indexing...") engine.execute( f"CREATE INDEX {directory}_index ON {directory} (term, year)") print(f"Index created for {directory}") except NoSuchTableError: print( f"Index creation failed because table '{directory}' does not exist. " f"Check if directory '{directory}' is empty.")
def delete_trigger(name: str, engine: Engine, table: str, schema: str = None): """Delete a trigger context (if exists) on database. Args: name (str): The trigger name. engine (Engine): The SQLAlchemy active database engine. table (str): The table name. schema (str): The table schema that the trigger is attached. """ schema = schema or 'public' engine.execute(f'DROP TRIGGER IF EXISTS {name} ON {schema}.{table}')
def wait_for_postgres(engine: Engine, max_waiting_time: int = 10): logging.info('Waiting until PostgreSQL accept connexions') for i in range(max_waiting_time): if does_postgres_accept_connection(engine): logging.info('PostgreSQL is ready to accept connexions') return logging.info( 'PostgreSQL is not ready to accept connexions, waiting {} more seconds' .format(max_waiting_time - i)) sleep(1) engine.connect() # Raise exception
def change_column_length(table: Table, column: Column, length: int, engine: Engine) -> None: """ Change the column length in the supplied table """ if column.type.length < length: print("Changing length of {} from {} to {}".format( column, column.type.length, length)) column.type.length = length column_name = column.name column_type = column.type.compile(engine.dialect) engine.execute( 'ALTER TABLE {table} ALTER COLUMN {column_name} TYPE {column_type}' .format(**locals()))
def _insert_data(engine: Engine, table: Union[Table, Base], buffer: List[Dict[str, Any]]) -> None: """ Inserts all records stored in buffer to the specified table using the specified engine. Does nothing, if buffer is empty. :param engine: Database engine. :param table: Database table, records are inserted into. :param buffer: List of new data to be inserted. """ if len(buffer) > 0: if isinstance(table, Table): engine.execute(table.insert(), buffer) else: engine.execute(table.__table__.insert(), buffer)
def get_tables(connection: Engine) -> pd.DataFrame: dfs = [] connection.execute(f"USE WAREHOUSE {warehouse};") query = ("SELECT TABLE_CATALOG, TABLE_SCHEMA, " "concat(TABLE_CATALOG,'_', TABLE_SCHEMA) as SCHEMA_ID, " "TABLE_NAME, concat(schema_id,'_',TABLE_NAME) as TABLE_ID, " "ROW_COUNT, CREATED, LAST_ALTERED " "FROM information_schema.TABLES " "WHERE TABLE_SCHEMA NOT IN ('PUBLIC', 'INFORMATION_SCHEMA');") for db in DATABASES: connection.execute(f"USE DATABASE {db};") df = pd.read_sql(query, connection) dfs.append(df) df = pd.concat(dfs, ignore_index=True) return df
def load_charges(engine: Engine, npartitions: int = None, clear_existing: bool = False): ddf = read_raw_data(npartitions) ddf = clean_data(ddf) if clear_existing: logger.info("Clearing any existing expungement data") for table in [runs, charges, features, outcomes]: logger.info(f"Deleting from: {table.name}") engine.execute(f""" DELETE FROM {table.name} """) load_to_db(ddf, target_table=charges, engine=engine, include_index=False)
def column_reflection_fallback( selectable: Select, dialect: Dialect, sqlalchemy_engine: Engine) -> List[Dict[str, str]]: """If we can't reflect the table, use a query to at least get column names.""" col_info_dict_list: List[Dict[str, str]] if dialect.name.lower() == "mssql": # Get column names and types from the database # Reference: https://dataedo.com/kb/query/sql-server/list-table-columns-in-database columns_query: str = f""" SELECT SCHEMA_NAME(tab.schema_id) AS schema_name, tab.name AS table_name, col.column_id AS column_id, col.name AS column_name, t.name AS column_data_type, col.max_length AS column_max_length, col.precision AS column_precision FROM sys.tables AS tab INNER JOIN sys.columns AS col ON tab.object_id = col.object_id LEFT JOIN sys.types AS t ON col.user_type_id = t.user_type_id WHERE tab.name = '{selectable}' ORDER BY schema_name, table_name, column_id """ col_info_query: TextClause = sa.text(columns_query) col_info_tuples_list: List[tuple] = sqlalchemy_engine.execute( col_info_query).fetchall() # type_module = _get_dialect_type_module(dialect=dialect) col_info_dict_list: List[Dict[str, str]] = [ { "name": column_name, # "type": getattr(type_module, column_data_type.upper())(), "type": column_data_type.upper(), } for schema_name, table_name, column_id, column_name, column_data_type, column_max_length, column_precision in col_info_tuples_list ] else: query: Select = sa.select([sa.text("*") ]).select_from(selectable).limit(1) result_object = sqlalchemy_engine.execute(query) # noinspection PyProtectedMember col_names: List[str] = result_object._metadata.keys col_info_dict_list = [{"name": col_name} for col_name in col_names] return col_info_dict_list
def _check_usage_date_ranges(self, engine: Engine) -> Any: query = """ select min(query_start_time) as min_time, max(query_start_time) as max_time from snowflake.account_usage.access_history """ with PerfTimer() as timer: try: for db_row in engine.execute(query): if len(db_row) < 2 or db_row[0] is None or db_row[1] is None: self.warn( logger, "check-usage-data", f"Missing data for access_history {db_row} - Check if using Enterprise edition of Snowflake", ) continue self.report.min_access_history_time = db_row[0].astimezone( tz=timezone.utc ) self.report.max_access_history_time = db_row[1].astimezone( tz=timezone.utc ) self.report.access_history_range_query_secs = round( timer.elapsed_seconds(), 2 ) except Exception as e: self.error(logger, "check-usage-data", f"Error was {e}")
def _populate_missing_dataset_extents(engine: Engine, product: DatasetType): query = ( postgres.insert(DATASET_SPATIAL) .from_select( [ "id", "dataset_type_ref", "center_time", "footprint", "region_code", "size_bytes", "creation_time", ], _select_dataset_extent_query(product), ) .on_conflict_do_nothing(index_elements=["id"]) ) _LOG.debug( "spatial_insert_query.start", product_name=product.name, # query_sql=as_sql(query), ) inserted = engine.execute(query).rowcount _LOG.debug("spatial_insert_query.end", product_name=product.name, inserted=inserted) return inserted
def datasets_by_region( engine: Engine, index: Index, product_name: str, region_code: str, time_range: Range, limit: int, offset: int = 0, ) -> Generator[Dataset, None, None]: product = index.products.get_by_name(product_name) query = (select(postgres_api._DATASET_SELECT_FIELDS).select_from( DATASET_SPATIAL.join( DATASET, DATASET_SPATIAL.c.id == DATASET.c.id)).where( DATASET_SPATIAL.c.region_code == bindparam( "region_code", region_code)).where( DATASET_SPATIAL.c.dataset_type_ref == bindparam( "dataset_type_ref", product.id))) if time_range: query = query.where(DATASET_SPATIAL.c.center_time > bindparam( "from_time", time_range.begin)).where( DATASET_SPATIAL.c.center_time < bindparam( "to_time", time_range.end)) query = (query.order_by(DATASET_SPATIAL.c.center_time).limit( bindparam("limit", limit)).offset(bindparam("offset", offset))) return (index.datasets._make(res, full_info=True) for res in engine.execute(query).fetchall())
def get_page_views(engine: Engine, mode: str = 'current') -> Union[int, None]: """Get page views for current date or all :param engine: SQLAlchemy engine object :param mode: page view aggregation method ('current', 'all') """ # SQL query return placeholder result = [] # get page views from database with engine.connect() as conn: if mode == 'all': result = conn.execute('SELECT total(page_views) FROM stats') elif mode == 'current': result = conn.execute('SELECT page_views FROM stats' ' WHERE tick_date = ?', date_now()) # unpack results into list of JSON records result = [dict(row) for row in result] # check results and return sanitized value if len(result) > 0: return int(list(result[0].values())[0]) else: return None
def load_svt_extra_db( engine: Engine, svtExtras: list[MstSvtExtra]) -> None: # pragma: no cover svtExtra_db_data = [svtExtra.dict() for svtExtra in svtExtras] with engine.begin() as conn: recreate_table(conn, mstSvtExtra) conn.execute(mstSvtExtra.insert(), svtExtra_db_data)
def populate(engine: Engine, data_path: str): conn = engine.raw_connection() cursor = conn.cursor() # keep insert_reviews = """ INSERT INTO review ( ID, username, movie_id, review, rating) VALUES (?, ?, ?, ?, ?)""" cursor.executemany(insert_reviews, generic_generator(os.path.join(data_path, 'reviews.csv'))) # keep insert_users = """ INSERT INTO users ( ID, username, password) VALUES (?, ?, ?)""" cursor.executemany(insert_users, generic_generator(os.path.join(data_path, 'users.csv'))) # keep insert_movies = """ INSERT INTO movies ( Rank, Title, Genre, Description, Director, Actors, Year, Runtime, Rating, Votes, Revenue, Metascore) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.executemany(insert_movies, generic_generator(os.path.join(data_path, 'Data1000Movies.csv'))) # sqlite3.OperationalError: near "(": syntax error conn.commit() conn.close()
def write_to_db(self, eng: Engine): conn = eng.raw_connection() cursor = conn.cursor() try: cursor.callproc("create_class_section", [self.crn, self.class_dept, self.class_number, self.professor, self.capacity, self.registered, self.semester_id]) for meeting_time in self.meeting_times: meeting_time.write_to_db(cursor) for restriction in self.restrictions: restriction.write_to_db(cursor) except err.IntegrityError as e: code = e.args[0] if code == 1062: print("IGNORING.") else: conn.rollback() print("ERROR CREATING SECTION", e) except err.InternalError as e: conn.rollback() print("ERROR CREATING SECTION", e) finally: conn.commit() cursor.close() conn.close()
def get_posts(engine: Engine, post_id: int = None) -> list: """ Get all blog posts as a list of table records :param engine: SQLAlchemy engine object :param post_id: blog entry 'id' (optional) :return: list of post records """ with engine.connect() as conn: if post_id is not None: result = conn.execute('SELECT * FROM entries WHERE id = ?', post_id) else: result = conn.execute('SELECT * FROM entries') # unpack results into list of JSON records posts = [dict(row) for row in result] # data correctly retrieved if len(posts) > 0: # unpack post 'tags' for i in range(len(posts)): posts[i]['tags'] = posts[i]['tags'].split(',') return posts
def check_database(engine: Engine, user_name: pwd.struct_passwd, tables: Iterable[Table]): logger.info("Checking database access as user %s", user_name) try: conn = engine.connect() except DBAPIError as e: logger.critical("Could not connect to database as %s: %s", user_name, e) raise with contextlib.closing(conn): for table in tables: try: check_table(conn, table) except DBAPIError as e: logger.critical("Query check for table %s as user %s failed: " "%s", table.name, user_name, e) raise
def get_revision( config: Config, engine: Engine, script: ScriptDirectory, revision_type='current' ) -> str: """ Helper to get revision id """ with engine.connect() as conn: with EnvironmentContext(config, script) as env_context: env_context.configure(conn, version_table="migrate_version") if revision_type == 'head': revision = env_context.get_head_revision() else: migration_context = env_context.get_context() revision = migration_context.get_current_revision() return revision