def create_database(cursor: psycopg2.extensions.cursor, overwrite: bool): """Initializes the Postgres database.""" if overwrite: logger.info("Removing existing tables") for table in ord_interface.client.TABLES: command = sql.SQL("DROP TABLE IF EXISTS {}") cursor.execute(command.format(sql.Identifier(table))) cursor.execute(sql.SQL("CREATE EXTENSION IF NOT EXISTS rdkit")) cursor.execute(sql.SQL("CREATE EXTENSION IF NOT EXISTS tsm_system_rows")) cursor.execute(sql.SQL("CREATE SCHEMA {}").format(sql.Identifier(ord_interface.client.RDKIT_SCHEMA))) for table, columns in ord_interface.client.TABLES.items(): dtypes = [] for column, dtype in columns.items(): if table == "reactions" and column == "reaction_id": component = sql.SQL("{} {} PRIMARY KEY") else: component = sql.SQL("{} {}") # NOTE(kearnes): sql.Identifier(dtype) does not work for the # 'double precision' type. dtypes.append(component.format(sql.Identifier(column), sql.SQL(dtype))) command = sql.Composed( [sql.SQL("CREATE TABLE {} (").format(sql.Identifier(table)), sql.Composed(dtypes).join(", "), sql.SQL(")")] ) logger.info("Running:\n%s", command.as_string(cursor)) cursor.execute(command)
def create_table(connection: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor): """Function which creates the table if needed Args: connection (psycopg2.extensions.connection): database connection cursor (psycopg2.extensions.cursor): database cursor """ # SQL query to templates table create_table_query = '''CREATE TABLE IF NOT EXISTS templates (id SERIAL PRIMARY KEY NOT NULL, name TEXT NOT NULL, year INT NOT NULL, month INT NOT NULL, category TEXT NOT NULL, uw_category TEXT, wikibreak_category1 TEXT, wikibreak_category2 TEXT, wikibreak_subcategory TEXT, amount INT, cumulative_amount INT); ''' # Execute a command cursor.execute(create_table_query) connection.commit() print("Table created successfully")
def _rdkit_reaction_smiles(cursor: psycopg2.extensions.cursor, table: str): """Adds RDKit cartridge tables for reaction SMILES. Creates a new table rdk.<table> with the following columns: * r: reaction objects loaded from reaction SMILES An index is also created for each column. Args: cursor: psycopg2 cursor. table: Table name. """ cursor.execute( sql.SQL( """ SELECT reaction_id, r INTO {} FROM ( SELECT reaction_id, reaction_from_smiles(reaction_smiles::cstring) AS r FROM {}) tmp WHERE r IS NOT NULL""" ).format(sql.Identifier(ord_interface.client.RDKIT_SCHEMA, table), sql.Identifier(table)) ) cursor.execute( sql.SQL("CREATE INDEX {} ON {} USING gist(r)").format( sql.Identifier(f"{table}_r"), sql.Identifier(ord_interface.client.RDKIT_SCHEMA, table) ) )
def _run( self, predicates: List["ReactionComponentPredicate"], cursor: psycopg2.extensions.cursor, limit: Optional[int] = None, ) -> List[Result]: """Runs the query for a set of predicates.""" if not predicates: return [] self._setup(predicates, cursor) predicate_components = [] args = [] for predicate in predicates: components = [ sql.SQL(""" SELECT DISTINCT dataset_id, reaction_id, serialized FROM reactions """) ] components.extend(self._get_tables()) components.append(sql.SQL(""" WHERE """)) predicate_sql, predicate_args = predicate.get() components.append(predicate_sql) args.extend(predicate_args) predicate_components.append(sql.Composed(components)) components = [sql.Composed(predicate_components).join(" INTERSECT ")] if limit: components.append(sql.SQL(" LIMIT %s")) args.append(limit) query = sql.Composed(components).join("") logger.info("Running SQL command:%s", cursor.mogrify(query, args).decode()) cursor.execute(query, args) return fetch_results(cursor)
def _get_business_id(cur: psycopg2.extensions.cursor, business_identifier: str) -> str: """Return the business id for the given identifier.""" cur.execute(f"select id from businesses where identifier='{business_identifier}'") business_id = cur.fetchone() if not business_id: return '' return str(business_id[0])
def ExportSession(self, cursor: psycopg2.extensions.cursor): if not self.session_id: args = (self.session_time.ToJsonString(), self.track, self.live_data) cursor.execute(SESSION_INSERT, args) self.session_id = cursor.fetchone()[0] self.timescale_conn.commit()
def stocks_bulk_insert(df: pd.DataFrame, conn: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor) -> Any: """ Inserts all rows of df in the DB. :param df: pd.DataFrame. stocks data. :param conn: psycopg2.extensions.connection. Connection to DB. :param cursor: cursor of DB. :return: None. """ if df.empty: return # iterating in DataFrame rows. for index, series in df.iterrows(): query = "INSERT INTO stock (ticker, stock_name, exchange) " \ "VALUES ('{}', '{}', '{}') ON CONFLICT DO NOTHING".format(series["ticker"], series["stock_name"], series["exchange"]) cursor.execute(query) conn.commit() print(">>> GETTING HISTORICAL DATA") ticker_list = list(df["ticker"]) __get_historical_data(ticker_list)
def check_and_mark_data_quality(cursor: psycopg2.extensions.cursor, log: logger.Log) -> None: """ Using `cursor`, mark rows in the stage.game_data table that satisfy data quality rules. Log to `log` a warning if any rows fail data quality. """ quality_check_sql = ''' UPDATE stage.game_data SET passed_data_quality_check = True WHERE game_id NOT IN ( SELECT distinct(game_id) FROM stage.game_data WHERE move_number ~ '\D' -- Must be an integer OR "column" ~ '\D' -- Must be an integer OR "column"::int > 4 -- The grid is 4x4 OR result NOT IN ('', 'win', 'draw') -- Valid values for result ) AND game_id NOT IN ( SELECT game_id FROM stage.game_data GROUP BY game_id HAVING COUNT(DISTINCT(player_id)) <> 2 -- A game should have 2 players ); ''' cursor.execute(quality_check_sql) cursor.execute("SELECT COUNT(*) FROM stage.game_data WHERE " "passed_data_quality_check = false;") number_bad = cursor.fetchone()[0] if number_bad > 0: log.write_warning( f'Rejected {number_bad} game records due to data quality.')
def run(self, cursor: psycopg2.extensions.cursor, limit: Optional[int] = None) -> List[Result]: """Runs the query. Args: cursor: psycopg2 cursor. limit: Integer maximum number of matches. If None (the default), no limit is set. Returns: List of Result instances. """ components = [ sql.SQL(""" SELECT DISTINCT dataset_id, reaction_id, serialized FROM reactions WHERE doi = ANY (%s)""") ] args = [self._dois] if limit: components.append(sql.SQL(" LIMIT %s")) args.append(limit) query = sql.Composed(components).join("") logger.info("Running SQL command:%s", cursor.mogrify(query, args).decode()) cursor.execute(query, args) return fetch_results(cursor)
def UpdateLapDuration(self, lap_number: int, duration: gps_pb2.Lap.duration, cursor: psycopg2.extensions.cursor): """Exports lap duration to the Timescale backend.""" args = (duration.ToMilliseconds(), self.lap_number_ids[lap_number]) cursor.execute(LAP_DURATION_UPDATE, args) self.timescale_conn.commit()
def cb(cursor: psycopg2.extensions.cursor) -> None: """ :type cursor: psycopg2.extensions.cursor """ # TODO: Could be smart here and only update if content-type/byte # data differs while keeping a row-lock between queries. cursor.execute(q, v)
def ExportPoint(self, point: gps_pb2.Point, lap_number: int, cursor: psycopg2.extensions.cursor): """Exports point data to timescale.""" lap_id = self.lap_number_ids.get(lap_number) if lap_id: geo_hash = geohash.encode(point.lat, point.lon) elapsed_duration_ms = self.GetElapsedTime(point, lap_id) args = ( point.time.ToJsonString(), self.session_id, lap_id, point.lat, point.lon, point.alt, point.speed * 2.23694, # m/s to mph, geo_hash, elapsed_duration_ms, point.tps_voltage, point.water_temp_voltage, point.oil_pressure_voltage, point.rpm, point.afr, point.fuel_level_voltage, point.accelerometer_x, point.accelerometer_y, point.accelerometer_z, point.pitch, point.roll) cursor.execute(POINT_INSERT, args) else: self.retry_point_queue.append((point, lap_number))
def process_song_file(cur: psycopg2.extensions.cursor, filepath: str): """Process individual song file and insert into database Args: cur (psycopg2.extensions.cursor): postgres cursor filepath (str): song filepath """ # open song file df = pd.read_json(filepath, lines=True) # insert song record song_data = list(df[["song_id", "title", "artist_id", "year", "duration"]].values[0]) cur.execute(song_table_insert, song_data) # insert artist record artist_data = list(df[[ "artist_id", "artist_name", "artist_location", "artist_latitude", "artist_longitude", ]].values[0]) cur.execute(artist_table_insert, artist_data)
def eh(c: psycopg2.extensions.cursor) -> None: c.execute(q, v) if c.rowcount == 0: raise KeyError(uuid) elif c.rowcount != 1: raise RuntimeError("Found more than one entry for the given " "uuid '%s' (got: %d)" % (uuid, c.rowcount))
def upload_data(cursor: psycopg2.extensions.cursor, csv_path: str) -> None: ''' execute SQL to upload roads.csv to postgres ''' query = """ DROP TABLE IF EXISTS roads; CREATE TABLE public.roads ( roadid integer PRIMARY KEY, geom public.geometry(MultiLineString,0), name character varying(125), street_classification character varying(125), relevant_road BOOL, ksi bigint, injured bigint, crashes bigint, ksi_mile float8, injured_mile float8, crashes_mile float8 ); COPY roads(roadid, geom, name, street_classification, relevant_road, ksi, injured, crashes, ksi_mile, injured_mile, crashes_mile) FROM '{}' DELIMITER ',' CSV HEADER; SELECT UpdateGeometrySRID('public', 'roads', 'geom', 2227); """.format(csv_path) cursor.execute(query)
def insert_entities(cursor: psycopg2.extensions.cursor): for item in entities: date = item.get('creation_date') title = item.get('title') preview = item.get('preview') text = item.get('text') rating = item.get('rating') voters_count = item.get('voters_count') category = item.get('category') sql_expr = f"INSERT INTO entities (" \ f"creation_date, " \ f"title, " \ f"preview, " \ f"text, " \ f"rating, " \ f"voters_count, " \ f"category_id" \ f") " \ f"VALUES (" \ f"%s, " \ f"%s, " \ f"%s, " \ f"%s, " \ f"%s, " \ f"%s," \ f"(SELECT id FROM categories WHERE route=%s)" \ f")" cursor.execute( sql_expr, (date, title, preview, text, rating, voters_count, category))
def get_data_from_db(query: str, conn: psycopg2.extensions.connection, cur: psycopg2.extensions.cursor, df: pd.DataFrame, col_names: List[str]) -> pd.DataFrame: try: cur.execute(query) while True: # Fetch the next 100 rows query_results = cur.fetchmany(100) # If an empty list is returned, then we've reached the end of the results if query_results == list(): break # Create a list of dictionaries where each dictionary represents a single row results_mapped = [{ col_names[i]: row[i] for i in range(len(col_names)) } for row in query_results] # Append the fetched rows to the DataFrame df = df.append(results_mapped, ignore_index=True) return df except Exception as error: print(f"{type(error).__name__}: {error}") print("Query:", cur.query) conn.rollback()
def select_DiscordUser_from_database(member_id: int, cursor: psycopg2.extensions.cursor): try: cursor.execute(sql_select_DiscordUser, (member_id, )) return cursor.fetchone() except (Exception, psycopg2.Error) as error: print(f"Error while selecting from DiscordUser on database: {error}")
def insert_DiscordUser_into_databse( member_id: int, cursor: psycopg2.extensions.cursor, connection: psycopg2.extensions.connection): try: cursor.execute(sql_insert_DiscordUser_into_database, (member_id, )) connection.commit() except (Exception, psycopg2.Error) as error: print(f"Error while inserting into DiscordUser on database: {error}")
def execute(c: psycopg2.extensions.cursor) -> None: c.execute(q, v) # Check query UUIDs against rows that would actually be deleted. deleted_uuid_set = set(r[0] for r in c.fetchall()) for uid in str_uuid_set: if uid not in deleted_uuid_set: raise KeyError(uid)
def task_10_list_first_10_customers(cur: psycopg2.extensions.cursor) -> list: """ List first 10 customers from the table Results: 10 records """ cur.execute("SELECT * FROM customers LIMIT 10;") return cur.fetchall()
def lookup_venue_id(cur: psycopg2.extensions.cursor, name): cur.execute("SELECT id FROM venues WHERE name = %s", (name, )) rows = cur.fetchmany(2) if len(rows) > 1: raise ValueError("Found more than 1 venue with name {}".format(name)) elif len(rows) == 0: raise ValueError("Didn't find venue with name {}".format(name)) else: return rows[0][0]
def empty_all_tables(cursor: psycopg2.extensions.cursor) -> None: tables = [ 'stage.game_data', 'error.game_data', 'prepared.game_data', 'stage.player_blobs', 'stage.player_info', 'error.player_info', 'prepared.player_info' ] for table in tables: cursor.execute(f'TRUNCATE TABLE {table};')
def iine_country(cur: psycopg2.extensions.cursor, country: str) -> int: cur.execute("select id from countries where name=%s", (country, )) country_id = cur.fetchone() if country_id is None: cur.execute("insert into countries (name) values (%s) returning id", (country, )) country_id = cur.fetchone() cur.connection.commit() return country_id
def _get_id_list(cur: psycopg2.extensions.cursor, column: str, table: str, table_val: str, val: str): """Return a stringified list of ids for the given table linked to the business_id.""" val = val.replace('(', '').replace(')', '') cur.execute(f'select {column} from {table} where {table_val} in ({val})') id_list = [] for _id in cur.fetchall(): id_list.append(_id[0]) return str(id_list).replace('[', '(').replace(']', ')')
def iine_person_type(cur: psycopg2.extensions.cursor, actor_type: str) -> int: cur.execute("select id from person_types where type=%s", (actor_type, )) type_id = cur.fetchone() if type_id is None: cur.execute("insert into person_types (type) values (%s) returning id", (actor_type, )) type_id = cur.fetchone() cur.connection.commit() return type_id
def dbExec(self, cur:psycopg2.extensions.cursor, sql:str, args:list) -> bool: try: self.logger.debug('dbExec %s %s', sql, args) cur.execute(sql, args) cur.execute('COMMIT;') return True except Exception as e: self.logger.exception('Unable to execute %s %s', sql, args) return False
def head_table(table: str, cur: psycopg2.extensions.cursor, n: int = 5): """ Return the n-first rows of a given table """ try: cur.execute(f"SELECT * FROM {table} LIMIT {n}") return [e for e in cur.fetchall()] except psycopg2.Error as e: print(f"Error: {e}")
def create_product_table(cur: psycopg2.extensions.cursor) -> None: """ Create products table :return: None """ cur.execute( f"CREATE TABLE {DB_TABLE} (id SERIAL PRIMARY KEY, name VARCHAR(10485760) UNIQUE, " "description VARCHAR(10485760), digital BOOLEAN default False, price FLOAT default 0, image VARCHAR, " "quantity INTEGER default 0, created_at timestamp default current_timestamp)" )
def insert_player_blob(cursor: psycopg2.extensions.cursor, content_json: str) -> None: """ Using `cursor`, insert `content_json` into the stage.player_blobs table. This functionality is split into its own function to enable using a test data file. """ cursor.execute( f"INSERT INTO stage.player_blobs(player_blob) VALUES ('{content_json}') " )