def get_role_users(cursor: Cursor, role: Role) -> List[str]: """ Look up all user names that are members of the given role. """ query(cursor, "SELECT u.usename FROM pg_user u, pg_auth_members m, pg_roles r " "WHERE u.usesysid = m.member AND m.roleid = r.oid AND r.rolname = %s", role[0]) return [row[0] for row in cursor.fetchall()]
def get_list_field_type_tuples(schema_table: SchemaTable, cursor: extensions.cursor) -> List[Tuple[str, str]]: """ Takes a schema table and a cursor and returns a list of tuples with the str field name and the sqltype in the proper ordinal order (which it gets by querying the information_schema). Note that the type is simply whatever is in the data_type field, and as of now, this does not use the precision and scale for numeric types. Note that this works equally well if the schema_table actually refers to a view, but it won't work with a materialized view since they aren't part of the SQL standard (so they aren't in the information schema) :param schema_table: the schema table to use (can also be views, but not materialized views) :param cursor: the cursor for where to execute this query :return: a list of tuple of strings, each one containing the field name and the sql type in ordinal order """ schema_name = schema_table.schema.string table_name = schema_table.table.string cursor.execute(sql.SQL(""" SELECT column_name, data_type FROM information_schema.columns WHERE table_schema = %s AND table_name = %s ORDER BY ordinal_position """), (schema_name, table_name)) # TODO add precision and scale to a parenthetical for numeric types # TODO make this return List[Tuple[Field, SQLType] instead of List[str, str] return cursor.fetchall()
def get_role_databases(cursor: Cursor, owner: Role) -> List[str]: """ Check if the given user has their own database. """ query(cursor, "SELECT datname FROM pg_database d, pg_user u " "WHERE d.datdba = u.usesysid AND u.usename = %s", owner[0]) return [row[0] for row in cursor.fetchall()]
def get_user_roles(cursor: Cursor, name: str) -> List[Role]: """ Look up all roles that the given user is a member of. """ query(cursor, "{} WHERE pg_has_role(%s, oid, 'member')".format(_ROLE_SELECT), name) return cursor.fetchall()
def get_movies_by_ids(ids: List[str], cursor: _cursor) -> List[dict]: """ Retrieves full movies data. """ logger.debug(f"Looking for {len(ids)} movies") args = ",".join(cursor.mogrify("%s", (_id, )).decode() for _id in ids) cursor.execute(f""" SELECT fw.id as fw_id, fw.title, fw.description, fw.rating, fw.created_at, fw.updated_at, array_agg(g.name) as genres, array_agg(p.full_name) as names, array_agg(pfw.role) as roles, array_agg(p.id) as persons_ids FROM content.film_work fw LEFT JOIN content.person_film_work pfw ON pfw.film_work_id = fw.id LEFT JOIN content.person p ON p.id = pfw.person_id LEFT JOIN content.genre_film_work gfw ON gfw.film_work_id = fw.id LEFT JOIN content.genre g ON g.id = gfw.genre_id WHERE fw.id IN ({args}) GROUP BY fw_id; """) movies = cursor.fetchall() logger.debug(f"Found {len(movies)} movies by ids") return movies
def update_estimated_release(self, manga_id: int, *, cur: Cursor = NotImplemented) -> None: sql = 'WITH tmp AS (SELECT MAX(chapter_number) as chn FROM chapters WHERE manga_id=%(manga)s) ' \ 'UPDATE manga SET estimated_release=(' \ ' SELECT MIN(release_date) FROM chapters ' \ ' WHERE manga_id=%(manga)s AND ' \ ' chapter_number=(SELECT chn FROM tmp) AND ' \ ' chapter_decimal IS NOT DISTINCT FROM (SELECT MAX(chapter_decimal) FROM chapters WHERE manga_id= %(manga)s AND chapter_number=(SELECT chn FROM tmp))' \ ') + release_interval ' \ 'WHERE manga_id=%(manga)s AND release_interval IS NOT NULL ' \ 'RETURNING estimated_release, (SELECT estimated_release FROM manga WHERE manga_id=%(manga)s) as estimated_release_old' cur.execute(sql, {'manga': manga_id}) rows = cur.fetchall() if not rows: maintenance.warning( "Nothing updated because manga id doesn't exist or release_interval was NULL" ) return row = rows[0] maintenance.info( f'Set estimated release from {row["estimated_release_old"]} to {row["estimated_release"]}' ) return row
def update_latest_chapter(self, data: Collection[Tuple[int, int, datetime]], *, cur: Cursor = NotImplemented) -> None: """ Updates the latest chapter and next chapter estimates for the given manga that contain new chapters Args: cur: Optional database cursor data: iterable of tuples or lists [manga_id, latest_chapter, release_date] Returns: None """ if not data: return format_ids = ','.join(['%s'] * len(data)) sql = f'SELECT latest_chapter, manga_id FROM manga WHERE manga_id IN ({format_ids})' cur.execute(sql, [d[0] for d in data]) rows = cur.fetchall() if not rows: return # Filter latest chapters rows = {r[1]: r[0] for r in rows} data = [d for d in data if rows[d[0]] is None or rows[d[0]] < d[1]] if not data: return sql = 'UPDATE manga m SET latest_chapter=c.latest_chapter, estimated_release=c.release_date + release_interval FROM ' \ ' (VALUES %s) as c(manga_id, latest_chapter, release_date) ' \ 'WHERE c.manga_id=m.manga_id' execute_values(cur, sql, data)
def sub_species(self, database_cursor: DatabaseCursor) -> List[Taxonomy]: """ Returns all sub taxonomies with rank TaxonomyRank.SPECIES including itself if itself has rank TaxonomyRank.SPECIES. Parameters ---------- database_cursor : DatabaseCursor Database cursor Returns ------- List[Taxonomy] List of taxonomies including the self """ recursive_subspecies_id_query = ( "WITH RECURSIVE subtaxonomies AS (" "SELECT id, parent_id, name, rank " f"FROM {self.__class__.TABLE_NAME} " "WHERE id = %s " "UNION " "SELECT t.id, t.parent_id, t.name, t.rank " f"FROM {self.__class__.TABLE_NAME} t " "INNER JOIN subtaxonomies s ON s.id = t.parent_id " f") SELECT id, parent_id, name, rank FROM subtaxonomies WHERE rank = %s;" ) database_cursor.execute(recursive_subspecies_id_query, (self.id, TaxonomyRank.SPECIES.value)) return [ self.__class__(row[0], row[1], row[2], row[3]) for row in database_cursor.fetchall() ]
def columns(pg_cur: cursor, table_schema: str, table_name: str, table_type: str = 'table', remove_pkey: bool = False, skip_columns: list = []) -> list: """ Returns the list of columns of a table Parameters ---------- pg_cur psycopg cursor table_schema the table_schema table_name the table table_type the type of table, i.e. view or table remove_pkey if True, the primary key is dropped skip_columns list of columns to be skipped """ assert table_type.lower() in ('table', 'view') if table_type.lower() == 'table': sql = """SELECT attname FROM pg_attribute WHERE attrelid = '{s}.{t}'::regclass AND attisdropped IS NOT TRUE AND attnum > 0 ORDER BY attnum ASC""".format(s=table_schema, t=table_name) else: sql = """ SELECT c.column_name FROM information_schema.tables t LEFT JOIN information_schema.columns c ON t.table_schema = c.table_schema AND t.table_name = c.table_name WHERE table_type = 'VIEW' AND t.table_schema = '{s}' AND t.table_name = '{t}' ORDER BY ordinal_position""".format(s=table_schema, t=table_name) pg_cur.execute(sql) pg_fields = pg_cur.fetchall() pg_fields = [field[0] for field in pg_fields if field[0]] for col in skip_columns: try: pg_fields.remove(col) except ValueError: raise InvalidSkipColumns( 'Cannot skip unexisting column "{col}" in "{s}.{t}"'.format( col=col, s=table_schema, t=table_name)) if remove_pkey: pkey = primary_key(pg_cur, table_schema, table_name) pg_fields.remove(pkey) return pg_fields
def get_roles(cursor: Cursor, *names: str) -> List[Role]: """ Look up existing roles by name. """ if not names: return [] query(cursor, "{} WHERE rolname IN %s".format(_ROLE_SELECT), names) return [Role(role) for role in cursor.fetchall()]
def fetch_all_records(schema_table: SchemaTable, cursor: extensions.cursor) -> List: """ Given a SchemaTable and a cursor, this simple utility will run a SELECT * on the object and return the full thing in memory. Recommended for use only on small objects! :param schema_table: the SchemaTable object that we want to fetch all from :param cursor: a cursor for where to execute this query :return: a list of tuple records with the table in memory """ cursor.execute(sql.SQL(""" SELECT * FROM {} """).format(schema_table)) return cursor.fetchall()
def fetch_sample_results(c: cursor) -> Dict[str, List[TestResult]]: results = defaultdict(list) for r in c.fetchall(): user_name, test_result, user_speed, infra, isp = r results[user_name].append( TestResult(user_name=user_name, ground_truth_rate=test_result, speed=user_speed, infra=infra, isp=isp)) return results
def get_column_names(schema_table: SchemaTable, cursor: extensions.cursor) -> List[str]: """ Gets a list of all columns (from the information schema) for a given schema and table in the ordinal order :param schema_table: the SchemaTable object that we want to get the columns_from :param cursor: a cursor for where to execute this query :return: a list of all table columns """ schema_name = schema_table.schema.string table_name = schema_table.table.string cursor.execute("SELECT column_name FROM information_schema.columns " "WHERE table_schema = %s AND table_name = %s ORDER BY ordinal_position", (schema_name, table_name)) return [x[0] for x in cursor.fetchall()]
def _fetch_updated_persons(cursor: _cursor, updated_after: datetime.datetime) -> List[dict]: """ Extracts all persons updated after provided date. """ cursor.execute( f""" SELECT id, updated_at, full_name FROM content.person WHERE updated_at > %s ORDER BY updated_at LIMIT {CONFIG.FETCH_FROM_PG_BY}; """, (updated_after, )) updated_persons = cursor.fetchall() logger.debug(f"Fetched {len(updated_persons)} persons") return updated_persons
def get_chapters(self, manga_id: int, service_id: int = None, *, limit: int = 100, cur: Cursor = NotImplemented) -> List[Chapter]: args: Tuple if service_id is None: sql = 'SELECT * FROM chapters WHERE manga_id=%s LIMIT %s' args = (manga_id, limit) else: sql = 'SELECT * FROM chapters WHERE manga_id=%s AND service_id=%s LIMIT %s' args = (manga_id, service_id, limit) cur.execute(sql, args) return list(map(Chapter.parse_obj, cur.fetchall()))
def _fetch_updated_movies(cursor: _cursor, updated_after: datetime.datetime) -> List[dict]: """ Returns all movies updated after provided date. """ cursor.execute( f""" SELECT id, updated_at FROM content.film_work WHERE updated_at > %s ORDER BY updated_at LIMIT {CONFIG.FETCH_FROM_PG_BY}; """, (updated_after, )) updated_movies = cursor.fetchall() logger.debug(f"Fetched {len(updated_movies)} linked movies") return updated_movies
def _get_table_data(table: str, sql: str, cur: cursor, ignore_cache=False) -> TableData: data: TableData = {"sql": sql, "table": table, "from_cache": False} cache_path = f"{TP_DIR}/{table}.pickle" if not ignore_cache and os.path.exists(cache_path): data = pickle.load(open(cache_path, "rb")) data["from_cache"] = True logger.info(f"found cache for {table}") return data cur.execute(sql) data["columns"] = [col.name for col in cur.description] data["rows"] = [dict(zip(data["columns"], row)) for row in cur.fetchall()] with open(cache_path, "wb") as out: pickle.dump(data, out) return data
def execute(self, sql: str, args: Sequence[Any] = None, *, fetch: bool = None, cur: Cursor = NotImplemented) -> List[DictRow]: """ Easy way for tests to call sql functions. Should not be used outside of tests. """ if fetch is None: fetch = sql.upper().startswith('SELECT') if args: args = list(args) cur.execute(sql, args) if fetch: return cur.fetchall() return []
def _fetch_updated_genres(cursor: _cursor, updated_after: datetime.datetime) -> List[dict]: """ Returns all genres updated after provided date """ cursor.execute( f""" SELECT id, name, description, updated_at FROM content.genre WHERE updated_at > %s ORDER BY updated_at LIMIT {CONFIG.FETCH_FROM_PG_BY}; """, (updated_after, )) updated_genres = cursor.fetchall() logger.debug(f"Fetched {len(updated_genres)} genres") return updated_genres
def _fetch_movies_by_persons(cursor: _cursor, persons: List[dict], updated_after: datetime.datetime): """ Extracts movies where provided persons participate. Also filters movies by updated_at. """ args = ",".join( cursor.mogrify("%s", (person["id"], )).decode() for person in persons) cursor.execute( f""" SELECT fw.id, fw.updated_at FROM content.film_work fw LEFT JOIN content.person_film_work pfw ON pfw.film_work_id = fw.id WHERE updated_at > %s AND pfw.person_id IN ({args}) ORDER BY fw.updated_at LIMIT {CONFIG.FETCH_FROM_PG_BY}; """, (updated_after, )) linked_movies = cursor.fetchall() logger.debug(f"Fetched {len(linked_movies)} linked movies") return linked_movies
def _fetch_movies_by_genres( cursor: _cursor, genres: List[dict], movie_updated_after: datetime.datetime) -> List[dict]: """ Returns all movies related to provided genres list. Also filters movies by provided updated_at field. """ args = ",".join( cursor.mogrify("%s", (genre["id"], )).decode() for genre in genres) cursor.execute( f""" SELECT fw.id, fw.updated_at FROM content.film_work fw LEFT JOIN content.genre_film_work gfw ON gfw.film_work_id = fw.id WHERE updated_at > %s AND gfw.genre_id IN ({args}) ORDER BY fw.updated_at LIMIT {CONFIG.FETCH_FROM_PG_BY}; """, (movie_updated_after, )) linked_movies = cursor.fetchall() logger.debug(f"Fetched {len(linked_movies)} linked movies") return linked_movies
def _query(sql_stmt, params, db_cursor: cursor) -> List: """ Wrapper for running queries that will automatically handle errors in a consistent manner. Returns the result of the query returned by fetchall() """ try: db_cursor.execute(sql.SQL(sql_stmt), params) except (ProgrammingError, DataError) as err: LOGGER.exception(f"database error - {err}") raise DbError("Internal database error, please contact LP DAAC User Services") try: rows = db_cursor.fetchall() except ProgrammingError as err: # no results, return an empty list rows = [] return rows
def manga_id_from_title(self, manga_title: str, service_id: int = None, *, cur: Cursor = NotImplemented) -> Optional[int]: """ Tries to find a manga id by the title. If service id given will filter out manga from that service. Will return None if multiple matches found Args: manga_title: Title of the manga service_id: Optional id of the service cur: Optional cursor to use Returns: The id of the manga that matches the title or None in case the amount of results was 0 or more than 1 """ args: Tuple if service_id is None: sql = 'SELECT manga_id FROM manga WHERE LOWER(title)=LOWER(%s) LIMIT 2' args = (manga_title, ) else: sql = 'SELECT m.manga_id FROM manga m ' \ 'LEFT JOIN manga_service ms ON ms.service_id=%s AND ms.manga_id=m.manga_id ' \ 'WHERE ms.service_id IS NULL AND LOWER(m.title)=LOWER(%s) LIMIT 2' args = (service_id, manga_title) cur.execute(sql, args) rows = cur.fetchall() if len(rows) > 1: logger.warning(f'Multiple matches for manga\n{rows}') return None if not rows: return None return rows[0][0]