def init_db(engine: Engine, *, debug: bool, message: str = "hmm"): """init tables""" if debug: log.instance_logger(engine, echoflag=True) metadata.create_all(bind=engine) engine.dispose() print(f"** init_db {debug=} {message=} **")
def __init__(self, pool, dialect, url, reactor=None, thread_pool=None, **kwargs): if reactor is None: raise TypeError("Must provide a reactor") self._engine = Engine(pool, dialect, url, **kwargs) self._reactor = reactor if thread_pool is None: thread_pool = reactor.getThreadPool() self._tpool = thread_pool
def create_database(database_engine: Engine, database_name: Text): """Try to connect to the database. Create it if it does not exist""" try: database_engine.connect() except sa.exc.OperationalError: default_db_url = f"sqlite:///{database_name}.db" default_engine = sa.create_engine(default_db_url) conn = default_engine.connect() conn.execute("commit") conn.execute(f"CREATE DATABASE {database_name}") conn.close()
def execute_query(engine: Engine, query: str) -> List[Tuple[Any]]: """ Execute DB queries safely. """ try: logging.warning(f"Running query on Snowflake: \n{query}") connection = engine.connect() results = connection.execute(query).fetchall() finally: connection.close() engine.dispose() return results
def __init__(self, pool, dialect, url, reactor=None, create_worker=_threaded_worker, **kwargs): if reactor is None: raise TypeError("Must provide a reactor") self._engine = Engine(pool, dialect, url, **kwargs) self._reactor = reactor self._create_worker = create_worker self._engine_worker = self._create_worker()
def coverage_get_total(self, engine: Engine, spider: str, table: str) -> int: """ Returns total amount of valid entries to be processed by extractor """ sql_string = f"SELECT count(*) FROM {table} WHERE {self.get_database_selection_string(spider, table)}" return pd.read_sql(sql_string, engine.connect())["count"][0]
def __init__(self, pool, dialect, url, reactor=None, **kwargs): if reactor is None: raise TypeError("Must provide a reactor") super(TwistedEngine, self).__init__() self._engine = Engine(pool, dialect, url, **kwargs) self._reactor = reactor
def initialize_sql(engine: Engine, drop_all: bool = False) -> scoped_session: DBSession.registry.clear() DBSession.configure(bind=engine) metadata.bind = engine if drop_all or os.environ.get('KOTTI_TEST_DB_STRING'): metadata.reflect() metadata.drop_all(engine) # Allow users of Kotti to cherry pick the tables that they want to use: settings = _resolve_dotted(get_settings()) tables = settings['kotti.use_tables'].strip() or None if tables: tables = [metadata.tables[name] for name in tables.split()] _adjust_for_engine(engine) # Allow migrations to set the 'head' stamp in case the database is # initialized freshly: if not engine.table_names(): stamp_heads() metadata.create_all(engine, tables=tables) if os.environ.get('KOTTI_DISABLE_POPULATORS', '0') not in TRUE_VALUES: for populate in settings['kotti.populators']: populate() commit() return DBSession
def save_data_to_database(self, df: pd.DataFrame, engine: Engine): if not AbstractPreprocessor._check_write_privilege(engine): AbstractPreprocessor.create_dir(self.output_dir, os.getlogin()) path = Path.joinpath(self.output_dir, os.getlogin(), datetime.now().isoformat() + '.json') with path.open("a") as f: df.to_json(f) return for idx, row in df.iterrows(): with engine.connect() as conn: t = Table('judgment_map', MetaData(), autoload_with=engine) if row['judgments']: # only insert, when we find judgments # Delete and reinsert as no upsert command is available stmt = t.delete().where(delete_stmt_decisions_with_df(df)) conn.execute(stmt) for k in row['judgments']: judgment_type_id = Judgment(k).value stmt = t.insert().values([{ "decision_id": str(row['decision_id']), "judgment_id": judgment_type_id }]) conn.execute(stmt) else: self.logger.warning( f"No judgments found for {row['html_url']}")
def initialize_sql(engine: Engine, drop_all: bool = False) -> scoped_session: DBSession.registry.clear() DBSession.configure(bind=engine) metadata.bind = engine if drop_all or os.environ.get("KOTTI_TEST_DB_STRING"): metadata.reflect() metadata.drop_all(engine) # Allow users of Kotti to cherry pick the tables that they want to use: settings = _resolve_dotted(get_settings()) tables = settings["kotti.use_tables"].strip() or None if tables: tables = [metadata.tables[name] for name in tables.split()] _adjust_for_engine(engine) # Allow migrations to set the 'head' stamp in case the database is # initialized freshly: if not engine.table_names(): stamp_heads() metadata.create_all(engine, tables=tables) if os.environ.get("KOTTI_DISABLE_POPULATORS", "0") not in TRUE_VALUES: for populate in settings["kotti.populators"]: populate() commit() return DBSession
def create_extension(bind: Engine, extension: str) -> None: """Setup an extension in postgresql. """ logger.info('[plenario] Create extension %s' % extension) connection = bind.connect() connection.execute('create extension %s' % extension) connection.close()
def __init__( self, engine: Engine, table: Table, ) -> None: self.connection = engine.connect() self.table = table
def add_lease( args, context: Context, engine: Engine, ) -> int: connection = engine.connect() values = obtain_lease_info(LeaseArguments.from_anonymous_args(args), context, missing_as_none=True) values = { k: (v if v is not None else text('DEFAULT')) for k, v in values.items() } ip, mac = values["IPAddress"], values["MAC"] logger.debug( "Inserting new lease for IP %s and MAC %s", ip, mac, ) with connection.begin(): # TODO: Use INSERT ON CONFLICT UPDATE on newer SQLAlchemy (>= 1.1) old_values = query_lease_for_update(connection, ip) if old_values is None: connection.execute(auth_dhcp_lease.insert(values=values)) else: logger.warning("Lease for IP %s and MAC %s already exists", ip, mac) perform_lease_update(connection, ip, mac, old_values, values) return os.EX_OK
def _get_view_names(engine: Engine, schema: str) -> list: """Return a list of view names, upper cased and prefixed by schema if needed.""" with engine.connect() as conn: return [ f"{f'{schema}.' if schema else ''}{view_name.upper()}" for view_name in engine.dialect.get_view_names(conn, schema) ]
def create_database(bind: Engine, database: str) -> None: """Setup a database (schema) in postgresql. """ logger.info('[plenario] Create database %s' % database) connection = bind.connect() connection.execute('commit') connection.execute('create database %s' % database) connection.close()
def coverage_get_successful(self, engine: Engine, spider: str, table: str) -> int: """Returns the total entries that got processed successfully""" query = ( f"SELECT count({self.col_name}) FROM {table} WHERE " f"{self.get_database_selection_string(spider)} AND {self.col_name} <> 'null'" ) return pd.read_sql(query, engine.connect())["count"][0]
def drop_database(bind: Engine, database: str) -> None: """Drop a database (schema) in postgresql. """ logger.info('[plenario] Drop database %s' % database) connection = bind.connect() connection.execute('commit') connection.execute('drop database %s' % database) connection.close()
def save_data_to_database(self, df: pd.DataFrame, engine: Engine): with engine.connect() as conn: t = Table('lower_court', MetaData(), autoload_with=conn) # Delete and reinsert as no upsert command is available stmt = t.delete().where(delete_stmt_decisions_with_df(df)) conn.execute(stmt) for _, row in df.iterrows(): if not 'lower_court' in row or row['lower_court'] is None: continue lower_court = row["lower_court"] res = {} if 'court' in lower_court and lower_court['court'] is not None: res['court_id'] = list( self.select( engine, 'court', 'court_id', f"court_string = '{lower_court['court']}'") )[0]['court_id'][0] res['court_id'] = int( res['court_id'] ) if res['court_id'] is not None else None if 'canton' in lower_court and lower_court[ 'canton'] is not None: res['canton_id'] = list( self.select(engine, 'canton', 'canton_id', f"short_code = '{lower_court['canton']}'") )[0]['canton_id'][0] res['canton_id'] = int( res['canton_id'] ) if res['canton_id'] is not None else None if 'chamber' in lower_court and lower_court[ 'chamber'] is not None: res['chamber_id'] = list( self.select( engine, 'chamber', 'chamber_id', f"chamber_string = '{lower_court['chamber']}'") )[0]['chamber_id'][0] res['chamber_id'] = int( res['chamber_id'] ) if res['chamber_id'] is not None else None stmt = t.insert().values([{ "decision_id": str(row['decision_id']), "court_id": res.get('court_id'), "canton_id": res.get('canton_id'), "chamber_id": res.get('chamber_id'), "date": lower_court.get('date'), "file_number": lower_court.get('file_number') }]) conn.execute(stmt)
class TwistedEngine(object): def __init__(self, pool, dialect, url, reactor=None, create_worker=_threaded_worker, **kwargs): if reactor is None: raise TypeError("Must provide a reactor") self._engine = Engine(pool, dialect, url, **kwargs) self._reactor = reactor self._create_worker = create_worker self._engine_worker = self._create_worker() def _defer_to_engine(self, f, *a, **k): return _defer_to_worker(self._reactor.callFromThread, self._engine_worker, f, *a, **k) @property def dialect(self): return self._engine.dialect @property def _has_events(self): return self._engine._has_events @property def _execution_options(self): return self._engine._execution_options def _should_log_info(self): return self._engine._should_log_info() def execute(self, *args, **kwargs): return (self._defer_to_engine(self._engine.execute, *args, **kwargs).addCallback( TwistedResultProxy, self._defer_to_engine)) def has_table(self, table_name, schema=None): return self._defer_to_engine(self._engine.has_table, table_name, schema) def table_names(self, schema=None, connection=None): if connection is not None: connection = connection._connection return self._defer_to_engine(self._engine.table_names, schema, connection) def connect(self): worker = self._create_worker() return (_defer_to_worker(self._reactor.callFromThread, worker, self._engine.connect).addCallback( TwistedConnection, self, worker))
def is_engine_available(engine: Engine) -> bool: """ Проверяет, что соединение с sqlalcyemy-egine открывается. """ try: conn = engine.connect() conn.detach() except SQLAlchemyError as exc: return False return True
def print_leases( args, context: Context, engine: Engine, ) -> int: """Print all leases in dnsmasq leasefile format""" connection = engine.connect() with connection.begin(): leases = get_all_auth_dhcp_leases(connection) context.stdout.writelines(generate_leasefile_lines(leases)) return os.EX_OK
def _drop_database(engine: Engine, dbname: str) -> None: """ Drop a test database. :param engine: Engine returned by test_engine() :param dbname: Database name to drop """ aurweb.schema.metadata.drop_all(bind=engine) conn = engine.connect() conn.execute(f"DROP DATABASE {dbname}") conn.close()
def save_data_to_database(self, df: pd.DataFrame, engine: Engine): for idx, row in df.iterrows(): with engine.connect() as conn: t = Table('citation', MetaData(), autoload_with=engine) # Delete and reinsert as no upsert command is available stmt = t.delete().where(delete_stmt_decisions_with_df(df)) engine.execute(stmt) for k in row['citations'].keys(): citation_type_id = CitationType(k).value for citation in row['citations'][k]: stmt = t.insert().values([{ "decision_id": str(row['decision_id']), "citation_type_id": citation_type_id, "url": citation.get("url"), "text": citation["text"] }]) engine.execute(stmt)
def _compile_print_and_run_sql_query( sql_schema_info: SQLAlchemySchemaInfo, graphql_query: str, parameters: Dict[str, Any], engine: Engine, ) -> List[Dict[str, Any]]: """Compile, print, bind the arguments, then execute the query.""" compilation_result = compile_graphql_to_sql(sql_schema_info, graphql_query) printed_query = print_sqlalchemy_query_string(compilation_result.query, sql_schema_info.dialect) query_with_parameters = bind_parameters_to_query_string( printed_query, compilation_result.input_metadata, parameters) return materialize_result_proxy(engine.execute(query_with_parameters))
class TwistedEngine(object): def __init__(self, pool, dialect, url, reactor=None, thread_pool=None, **kwargs): if reactor is None: raise TypeError("Must provide a reactor") self._engine = Engine(pool, dialect, url, **kwargs) self._reactor = reactor if thread_pool is None: thread_pool = reactor.getThreadPool() self._tpool = thread_pool def _defer_to_thread(self, f, *args, **kwargs): return deferToThreadPool( self._reactor, self._tpool, f, *args, **kwargs ) @property def dialect(self): return self._engine.dialect @property def _has_events(self): return self._engine._has_events @property def _execution_options(self): return self._engine._execution_options def _should_log_info(self): return self._engine._should_log_info() def connect(self): d = self._defer_to_thread(self._engine.connect) d.addCallback(TwistedConnection, self) return d def execute(self, *args, **kwargs): d = self._defer_to_thread(self._engine.execute, *args, **kwargs) d.addCallback(TwistedResultProxy, self) return d def has_table(self, table_name, schema=None): return self._defer_to_thread( self._engine.has_table, table_name, schema) def table_names(self, schema=None, connection=None): if connection is not None: connection = connection._connection return self._defer_to_thread( self._engine.table_names, schema, connection)
def compile_and_run_sql_query( sql_schema_info: SQLAlchemySchemaInfo, graphql_query: str, parameters: Dict[str, Any], engine: Engine, ) -> List[Dict[str, Any]]: """Compile and run a SQL query against the supplied SQL backend.""" compilation_result = graphql_to_sql(sql_schema_info, graphql_query, parameters) query = compilation_result.query results = [] for result in engine.execute(query): results.append(dict(result)) return results
def compile_and_run_sql_query( sql_schema_info: SQLAlchemySchemaInfo, graphql_query: str, parameters: Dict[str, Any], engine: Engine, ) -> Tuple[List[Dict[str, Any]], Dict[str, OutputMetadata]]: """Compile and run a SQL query against the SQL engine, return result and output metadata.""" compilation_result = graphql_to_sql(sql_schema_info, graphql_query, parameters) query = compilation_result.query results = [] for result in engine.execute(query): results.append(dict(result)) # Output metadata is needed for MSSQL fold postprocessing. return results, compilation_result.output_metadata
def update_job_listing(engine: Engine) -> Optional[List[dict]]: """ Update row in database with problematic characters escaped. :param engine: Engine object representing a SQL database. :type engine: engine :return: Optional[List[dict]] """ result = engine.execute( text("UPDATE nyc_jobs SET business_title = 'Senior QA Scapegoat 🏆', \ job_category = 'Information? <>!#%%Technology!%%#^&%* & Telecom' \ WHERE job_id = 229837;")) LOGGER.info(f"Selected {result.rowcount} row: \ {result}") return result.rowcount
def fetch_job_listings(engine: Engine) -> Optional[List[dict]]: """ Select rows from database and parse as list of dicts. :param engine: Database engine to handle raw SQL queries. :type engine: engine :return: Optional[List[dict]] """ result = engine.execute( text("SELECT job_id, agency, business_title, \ salary_range_from, salary_range_to \ FROM nyc_jobs ORDER BY RAND() LIMIT 10;")) rows = [dict(row) for row in result.fetchall()] LOGGER.info(f"Selected {result.rowcount} rows: {rows}") return rows
def __init__( self, db_engine: Engine, db_table: str, db_col: str, order_col: str = "id", ): self.db_engine = db_engine self.db_table = db_table self.db_col = db_col result = db_engine.execute( f"SELECT {db_col} FROM {db_table} ORDER BY {order_col}" ) # each row is a tuple self.tweets = [r[0] for r in result] self.size = len(self.tweets)
def dump_orm_object_as_insert_sql(engine: Engine, obj: object, fileobj: TextIO) -> None: """ Takes a SQLAlchemy ORM object, and writes ``INSERT`` SQL to replicate it to the output file-like object. Args: engine: SQLAlchemy :class:`Engine` obj: SQLAlchemy ORM object to write fileobj: file-like object to write to """ # literal_query = make_literal_query_fn(engine.dialect) insp = inspect(obj) # insp: an InstanceState # http://docs.sqlalchemy.org/en/latest/orm/internals.html#sqlalchemy.orm.state.InstanceState # noqa # insp.mapper: a Mapper # http://docs.sqlalchemy.org/en/latest/orm/mapping_api.html#sqlalchemy.orm.mapper.Mapper # noqa # Don't do this: # table = insp.mapper.mapped_table # Do this instead. The method above gives you fancy data types like list # and Arrow on the Python side. We want the bog-standard datatypes drawn # from the database itself. meta = MetaData(bind=engine) table_name = insp.mapper.mapped_table.name # log.debug("table_name: {}", table_name) table = Table(table_name, meta, autoload=True) # log.debug("table: {}", table) # NewRecord = quick_mapper(table) # columns = table.columns.keys() query = select(table.columns) # log.debug("query: {}", query) for orm_pkcol in insp.mapper.primary_key: core_pkcol = table.columns.get(orm_pkcol.name) pkval = getattr(obj, orm_pkcol.name) query = query.where(core_pkcol == pkval) # log.debug("query: {}", query) cursor = engine.execute(query) row = cursor.fetchone() # should only be one... row_dict = dict(row) # log.debug("obj: {}", obj) # log.debug("row_dict: {}", row_dict) statement = table.insert(values=row_dict) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) writeline_nl(fileobj, insert_str)
def fast_append_to_sql( df: pd.DataFrame, engine: Engine, table_type: Union[Type[TemptableOfficialNoLocation], Type[TemptableOfficialHasLocation]], ): table = table_type.__table__ cols = [x.name for x in table.columns if x.name != "id"] colnames = [f'"{x}"' for x in cols] temp_df = df.reset_index() # make sure we have the columns have_cols = set(list(temp_df)) missing_cols = set(cols) - have_cols if len(missing_cols) > 0: msg = "Missing columns {}".format(", ".join(list(missing_cols))) raise ValueError(msg) with closing(engine.connect()) as con: if engine.dialect.name == "postgresql": dest = ("{}.{}".format(table.schema, table.name) if table.schema is not None else table.name) with io.StringIO() as csv: temp_df.to_csv(csv, sep="\t", columns=cols, index=False, header=False) csv.seek(0) with closing(con.connection.cursor()) as cur: cur.copy_from(csv, dest, columns=colnames, null="") cur.connection.commit() elif engine.dialect.name == "sqlite": # pandas is ok for sqlite temp_df[cols].to_sql(table.name, engine, if_exists="append", index=False, chunksize=500_000) else: raise NotImplementedError( "Only implemented for sqlite and postgres")
def dump_orm_object_as_insert_sql(engine: Engine, obj: object, fileobj: TextIO) -> None: # literal_query = make_literal_query_fn(engine.dialect) insp = inspect(obj) # insp: an InstanceState # http://docs.sqlalchemy.org/en/latest/orm/internals.html#sqlalchemy.orm.state.InstanceState # noqa # insp.mapper: a Mapper # http://docs.sqlalchemy.org/en/latest/orm/mapping_api.html#sqlalchemy.orm.mapper.Mapper # noqa # Don't do this: # table = insp.mapper.mapped_table # Do this instead. The method above gives you fancy data types like list # and Arrow on the Python side. We want the bog-standard datatypes drawn # from the database itself. meta = MetaData(bind=engine) table_name = insp.mapper.mapped_table.name # log.debug("table_name: {}".format(table_name)) table = Table(table_name, meta, autoload=True) # log.debug("table: {}".format(table)) # NewRecord = quick_mapper(table) # columns = table.columns.keys() query = sql.select(table.columns) # log.debug("query: {}".format(query)) for orm_pkcol in insp.mapper.primary_key: core_pkcol = table.columns.get(orm_pkcol.name) pkval = getattr(obj, orm_pkcol.name) query = query.where(core_pkcol == pkval) # log.debug("query: {}".format(query)) cursor = engine.execute(query) row = cursor.fetchone() # should only be one... row_dict = dict(row) # log.debug("obj: {}".format(obj)) # log.debug("row_dict: {}".format(row_dict)) statement = table.insert(values=row_dict) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) writeline_nl(fileobj, insert_str)
class TwistedEngine(object): def __init__(self, pool, dialect, url, reactor=None, **kwargs): if reactor is None: raise TypeError("Must provide a reactor") super(TwistedEngine, self).__init__() self._engine = Engine(pool, dialect, url, **kwargs) self._reactor = reactor def _defer_to_thread(self, f, *args, **kwargs): tpool = self._reactor.getThreadPool() return deferToThreadPool(self._reactor, tpool, f, *args, **kwargs) @property def dialect(self): return self._engine.dialect @property def _has_events(self): return self._engine._has_events @property def _execution_options(self): return self._engine._execution_options def _should_log_info(self): return self._engine._should_log_info() def connect(self): d = self._defer_to_thread(self._engine.connect) d.addCallback(TwistedConnection, self) return d def execute(self, *args, **kwargs): d = self._defer_to_thread(self._engine.execute, *args, **kwargs) d.addCallback(TwistedResultProxy, self) return d
def dump_table_as_insert_sql(engine: Engine, table_name: str, fileobj: TextIO, wheredict: Dict[str, Any] = None, include_ddl: bool = False, multirow: bool = False) -> None: # http://stackoverflow.com/questions/5631078/sqlalchemy-print-the-actual-query # noqa # http://docs.sqlalchemy.org/en/latest/faq/sqlexpressions.html # http://www.tylerlesmann.com/2009/apr/27/copying-databases-across-platforms-sqlalchemy/ # noqa # https://github.com/plq/scripts/blob/master/pg_dump.py log.info("dump_data_as_insert_sql: table_name={}".format(table_name)) writelines_nl(fileobj, [ sql_comment("Data for table: {}".format(table_name)), sql_comment("Filters: {}".format(wheredict)), ]) dialect = engine.dialect if not dialect.supports_multivalues_insert: multirow = False if multirow: log.warning("dump_data_as_insert_sql: multirow parameter substitution " "not working yet") multirow = False # literal_query = make_literal_query_fn(dialect) meta = MetaData(bind=engine) log.debug("... retrieving schema") table = Table(table_name, meta, autoload=True) if include_ddl: log.debug("... producing DDL") dump_ddl(table.metadata, dialect_name=engine.dialect.name, fileobj=fileobj) # NewRecord = quick_mapper(table) # columns = table.columns.keys() log.debug("... fetching records") # log.debug("meta: {}".format(meta)) # obscures password # log.debug("table: {}".format(table)) # log.debug("table.columns: {}".format(repr(table.columns))) # log.debug("multirow: {}".format(multirow)) query = sql.select(table.columns) if wheredict: for k, v in wheredict.items(): col = table.columns.get(k) query = query.where(col == v) # log.debug("query: {}".format(query)) cursor = engine.execute(query) if multirow: row_dict_list = [] for r in cursor: row_dict_list.append(dict(r)) # log.debug("row_dict_list: {}".format(row_dict_list)) statement = table.insert().values(row_dict_list) # log.debug("statement: {}".format(repr(statement))) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) # NOT WORKING FOR MULTIROW INSERTS. ONLY SUBSTITUTES FIRST ROW. writeline_nl(fileobj, insert_str) else: for r in cursor: row_dict = dict(r) statement = table.insert(values=row_dict) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) # log.debug("row_dict: {}".format(row_dict)) # log.debug("insert_str: {}".format(insert_str)) writeline_nl(fileobj, insert_str) log.debug("... done")