Пример #1
0
 def get_manga_artists(self,
                       manga_id: int,
                       *,
                       cur: Cursor = NotImplemented) -> List[MangaArtist]:
     sql = 'SELECT * FROM manga_artists WHERE manga_id=%s'
     cur.execute(sql, (manga_id, ))
     return list(map(MangaArtist.parse_obj, cur))
Пример #2
0
    def sub_species(self, database_cursor: DatabaseCursor) -> List[Taxonomy]:
        """
        Returns all sub taxonomies with rank TaxonomyRank.SPECIES including itself if itself has rank TaxonomyRank.SPECIES.

        Parameters
        ----------
        database_cursor : DatabaseCursor
            Database cursor

        Returns
        -------
        List[Taxonomy]
            List of taxonomies including the self
        """
        recursive_subspecies_id_query = (
            "WITH RECURSIVE subtaxonomies AS ("
            "SELECT id, parent_id, name, rank "
            f"FROM {self.__class__.TABLE_NAME} "
            "WHERE id = %s "
            "UNION "
            "SELECT t.id, t.parent_id, t.name, t.rank "
            f"FROM {self.__class__.TABLE_NAME} t "
            "INNER JOIN subtaxonomies s ON s.id = t.parent_id "
            f") SELECT id, parent_id, name, rank FROM subtaxonomies WHERE rank = %s;"
        )
        database_cursor.execute(recursive_subspecies_id_query,
                                (self.id, TaxonomyRank.SPECIES.value))
        return [
            self.__class__(row[0], row[1], row[2], row[3])
            for row in database_cursor.fetchall()
        ]
Пример #3
0
def execute_operation(cur: cursor,
                      conn: connection,
                      commands: List[str],
                      verbose: bool = True) -> None:
    """
    Execute database operation.

    Parameters
    ----------
    cur : psycopg2.extensions.cursor
        Database cursor.
    conn : psycopg2.extensions.connection
        Database connection.
    commands : list[str]
        Commands or queries to be executed in the database.
    verbose : bool, optional
        Whether to be verbose.
    """
    for c in commands:
        if verbose:
            print(80 * '-')
            print(f'[{datetime.now()}] Running...')
            print(c)
        cur.execute(c)
        conn.commit()
        if verbose:
            print('Done!')
Пример #4
0
def get_list_field_type_tuples(schema_table: SchemaTable, cursor: extensions.cursor) -> List[Tuple[str, str]]:
    """
    Takes a schema table and a cursor and returns a list of tuples with the str field
    name and the sqltype in the proper ordinal order (which it gets by querying the information_schema).

    Note that the type is simply whatever is in the data_type field, and as of now, this does not use the precision
    and scale for numeric types.

    Note that this works equally well if the schema_table actually refers to a view, but it won't work with a
    materialized view since they aren't part of the SQL standard (so they aren't in the information schema)

    :param schema_table: the schema table to use (can also be views, but not materialized views)
    :param cursor: the cursor for where to execute this query
    :return: a list of tuple of strings, each one containing the field name and the sql type in ordinal order
    """
    schema_name = schema_table.schema.string
    table_name = schema_table.table.string

    cursor.execute(sql.SQL("""
            SELECT column_name, data_type FROM information_schema.columns
            WHERE table_schema = %s AND table_name = %s ORDER BY ordinal_position
              """), (schema_name, table_name))

    # TODO add precision and scale to a parenthetical for numeric types
    # TODO make this return List[Tuple[Field, SQLType] instead of List[str, str]
    return cursor.fetchall()
Пример #5
0
def get_movies_by_ids(ids: List[str], cursor: _cursor) -> List[dict]:
    """
    Retrieves full movies data.
    """
    logger.debug(f"Looking for {len(ids)} movies")
    args = ",".join(cursor.mogrify("%s", (_id, )).decode() for _id in ids)
    cursor.execute(f"""
    SELECT
        fw.id as fw_id, 
        fw.title, 
        fw.description, 
        fw.rating, 
        fw.created_at, 
        fw.updated_at, 
        array_agg(g.name) as genres,
        array_agg(p.full_name) as names,
        array_agg(pfw.role) as roles,
        array_agg(p.id) as persons_ids
    FROM content.film_work fw
    LEFT JOIN content.person_film_work pfw ON pfw.film_work_id = fw.id
    LEFT JOIN content.person p ON p.id = pfw.person_id
    LEFT JOIN content.genre_film_work gfw ON gfw.film_work_id = fw.id
    LEFT JOIN content.genre g ON g.id = gfw.genre_id
    WHERE fw.id IN ({args})
    GROUP BY fw_id;
    """)
    movies = cursor.fetchall()
    logger.debug(f"Found {len(movies)} movies by ids")
    return movies
Пример #6
0
def process_song_file(cur: cursor, filepath: str):
    """Process a song file

    Args:
        cur (cursor): Conected cursor
        filepath (str): Path for a json file
    """
    # open song file
    df = pd.read_json(filepath, lines=True)

    # insert song record
    song_data = list(
        df.loc[0,
               ["song_id", "title", "artist_id", "year", "duration"]].values)
    song_data = [str(col) for col in song_data]
    cur.execute(sql.song_table_insert, song_data)

    # insert artist record
    cols = [
        "artist_id", "artist_name", "artist_location", "artist_latitude",
        "artist_longitude"
    ]
    artist_data = list(df.loc[0, cols].values)

    # converting all cells to str
    artist_data = [str(col) for col in artist_data]
    cur.execute(sql.artist_table_insert, artist_data)
Пример #7
0
    def update_estimated_release(self,
                                 manga_id: int,
                                 *,
                                 cur: Cursor = NotImplemented) -> None:
        sql = 'WITH tmp AS (SELECT MAX(chapter_number) as chn FROM chapters WHERE manga_id=%(manga)s) ' \
              'UPDATE manga SET estimated_release=(' \
              ' SELECT MIN(release_date) FROM chapters ' \
              '     WHERE manga_id=%(manga)s AND ' \
              '           chapter_number=(SELECT chn FROM tmp) AND ' \
              '           chapter_decimal IS NOT DISTINCT FROM (SELECT MAX(chapter_decimal) FROM chapters WHERE manga_id= %(manga)s AND chapter_number=(SELECT chn FROM tmp))' \
              ') + release_interval ' \
              'WHERE manga_id=%(manga)s AND release_interval IS NOT NULL ' \
              'RETURNING estimated_release, (SELECT estimated_release FROM manga WHERE manga_id=%(manga)s) as estimated_release_old'

        cur.execute(sql, {'manga': manga_id})
        rows = cur.fetchall()
        if not rows:
            maintenance.warning(
                "Nothing updated because manga id doesn't exist or release_interval was NULL"
            )
            return

        row = rows[0]
        maintenance.info(
            f'Set estimated release from {row["estimated_release_old"]} to {row["estimated_release"]}'
        )
        return row
Пример #8
0
    def update_latest_chapter(self,
                              data: Collection[Tuple[int, int, datetime]],
                              *,
                              cur: Cursor = NotImplemented) -> None:
        """
        Updates the latest chapter and next chapter estimates for the given manga that contain new chapters
        Args:
            cur: Optional database cursor
            data: iterable of tuples or lists [manga_id, latest_chapter, release_date]

        Returns:
            None
        """
        if not data:
            return

        format_ids = ','.join(['%s'] * len(data))
        sql = f'SELECT latest_chapter, manga_id FROM manga WHERE manga_id IN ({format_ids})'
        cur.execute(sql, [d[0] for d in data])
        rows = cur.fetchall()
        if not rows:
            return

        # Filter latest chapters
        rows = {r[1]: r[0] for r in rows}
        data = [d for d in data if rows[d[0]] is None or rows[d[0]] < d[1]]
        if not data:
            return

        sql = 'UPDATE manga m SET latest_chapter=c.latest_chapter, estimated_release=c.release_date + release_interval FROM ' \
              ' (VALUES %s) as c(manga_id, latest_chapter, release_date) ' \
              'WHERE c.manga_id=m.manga_id'
        execute_values(cur, sql, data)
Пример #9
0
    def get_only_latest_entries(
            self,
            service_id: int,
            entries: Collection[BaseChapter],
            manga_id: int = None,
            limit: int = 400,
            *,
            cur: Cursor = NotImplemented) -> Collection[BaseChapter]:
        if len(entries) > 200:
            logger.warning(
                'Over 200 entries passed to get_only_latest_entries')

        args: Tuple = tuple(c.chapter_identifier for c in entries)
        format_args = ','.join(('%s', ) * len(args))

        if manga_id:
            sql = 'SELECT chapter_identifier FROM chapters ' \
                  f'WHERE service_id=%s AND manga_id=%s AND chapter_identifier IN ({format_args})'
            args = (service_id, manga_id, *args)
        else:
            sql = 'SELECT chapter_identifier FROM chapters ' \
                  f'WHERE service_id=%s AND chapter_identifier IN ({format_args})'
            args = (service_id, *args)

        try:
            cur.execute(sql, args)

            return set(entries).difference(set(r[0] for r in cur))

        except:
            logger.exception('Failed to get old chapters')
            return list(entries)
Пример #10
0
def _execute_file(migration_file: MigrationFile, curs: cursor):
    with open(migration_file.path) as stream:
        curs.execute(stream.read())

    t = """INSERT INTO arctic_tern_migrations VALUES (%s, %s, %s, now())"""
    curs.execute(
        t, [migration_file.stamp, migration_file.name, migration_file.hash_])
def find_parent(cursor: extensions.cursor, named_location_id: int,
                parents: Dict[str, str]):
    """
    Recursively search for the site.

    :param cursor: A database cursor object.
    :param named_location_id: The named location ID.
    :param parents: Collection to append to.
    """
    sql = '''
        select
            prnt_nam_locn_id, nam_locn.nam_locn_name, type.type_name
        from
            nam_locn_tree
        join
            nam_locn on nam_locn.nam_locn_id = nam_locn_tree.prnt_nam_locn_id
        join
            type on type.type_id = nam_locn.type_id
        where
            chld_nam_locn_id = %s
    '''
    cursor.execute(sql, [named_location_id])
    row = cursor.fetchone()
    if row is not None:
        parent_id = row[0]
        name = row[1]
        type_name = row[2]
        if type_name.lower() == 'site':
            parents['site'] = name
        if type_name.lower() == 'domain':
            parents['domain'] = name
        find_parent(cursor, parent_id, parents)
Пример #12
0
def primary_key(pg_cur: cursor, schema_name: str, table_name: str) -> str:
    """
    Returns the primary of a table

    Parameters
    ----------
    pg_cur
        psycopg cursor
    schema_name
        the schema name
    table_name
        the table name
    """
    sql = "SELECT c.column_name"\
          " FROM information_schema.key_column_usage AS c "\
          " LEFT JOIN information_schema.table_constraints AS t"\
          " ON t.constraint_name = c.constraint_name"\
          " WHERE t.table_name = '{t}'"\
          " AND t.table_schema = '{s}'"\
          " AND t.constraint_type = 'PRIMARY KEY'".format(s=schema_name, t=table_name)
    pg_cur.execute(sql)
    try:
        pkey = pg_cur.fetchone()[0]
    except Exception:
        raise TableHasNoPrimaryKey(sql)
    return pkey
Пример #13
0
def default_value(pg_cur: cursor, table_schema: str, table_name: str,
                  column: str) -> str:
    """
    Returns the default value of the column

    Parameters
    ----------
    pg_cur
        the psycopg cursor
    table_schema
        the table schema
    table_name
        the table name
    column
        the column name
    """
    # see https://stackoverflow.com/a/8148177/1548052

    sql = "SELECT pg_get_expr(d.adbin, d.adrelid) AS default_value\n" \
          "FROM pg_catalog.pg_attribute a\n" \
          "LEFT JOIN pg_catalog.pg_attrdef d ON (a.attrelid, a.attnum) = (d.adrelid,  d.adnum)\n" \
          "WHERE  NOT a.attisdropped   -- no dropped (dead) columns\n" \
          "AND    a.attnum > 0         -- no system columns\n" \
          "AND    a.attrelid = '{ts}.{tn}'::regclass\n" \
          "AND    a.attname = '{col}';" \
        .format(ts=table_schema,
                tn=table_name,
                col=column)
    pg_cur.execute(sql)
    return pg_cur.fetchone()[0] or 'NULL'
Пример #14
0
def geometry_type(pg_cur: cursor,
                  table_schema: str,
                  table_name: str,
                  column: str = 'geometry') -> (str, int):
    """
    Returns the geometry type of a column as a tuple (type, srid)

    Parameters
    ----------
    pg_cur
        the psycopg cursor
    table_schema
        the table schema
    table_name
        the table name
    column:
        the geometry column name, defaults to "geometry"
    """
    sql = "SELECT type, srid " \
          "FROM geometry_columns " \
          "WHERE f_table_schema = '{s}' " \
          "AND f_table_name = '{t}' " \
          "AND f_geometry_column = '{c}';".format(s=table_schema,
                                                  t=table_name,
                                                  c=column)
    pg_cur.execute(sql)
    res = pg_cur.fetchone()
    if res:
        return res[0], res[1]
    else:
        return None
Пример #15
0
def test_tables(cur: psycopg2Ext.cursor, conn: psycopg2Ext.connection) -> None:
    """
    Description: Test table status to make sure tables exists.

    Arguments:
        cur (psycopg2Ext.cursor): cursor object
        conn (psycopg2Ext.connection): connection object

    Returns:
        None
    """
    print("\n==================== TEST -- table status  ====================")

    for query in create_table_queries:
        tbl_name = query[query.find("EXISTS") +
                         len("EXISTS"):query.find("(")].strip()
        query = f"""select exists(select * from information_schema.tables
            where table_name='{tbl_name}')"""

        try:
            cur.execute(query)
        except psycopg2.Error as e:
            msg = f"ERROR: Could not retrieve table info with query: {query}"
            logger.warning(msg, e)
            return
        conn.commit()

        try:
            tbl_status = cur.fetchone()[0]
        except psycopg2.Error as e:
            msg = f"ERROR: Could not fetch table status for table: {tbl_name}"
            logger.warning(msg, e)
            return

        print(f"Table '{tbl_name}' exists status: {tbl_status}.")
Пример #16
0
def create_dist_schema(
    DWH_DB_USER: str,
    schema_name: str,
    cur: psycopg2Ext.cursor,
    conn: psycopg2Ext.connection,
) -> None:
    """
    Description: Create distribution schema and set search path
        to the schema name.

    Arguments:
        DWH_DB_USER (str): db user name to restrict authorization
        schema_name (str): schema
        cur (psycopg2Ext.cursor): cursor object
        conn (psycopg2Ext.connection): connection object

    Returns:
        None
    """

    queries = [
        f"DROP SCHEMA IF EXISTS {schema_name} CASCADE;",
        f"CREATE SCHEMA IF NOT EXISTS {schema_name} authorization {DWH_DB_USER };",
        f"SET search_path TO {schema_name};",
    ]

    for query in queries:
        try:
            cur.execute(query)
        except psycopg2.Error as e:
            msg = f"ERROR: Issue dropping/creating schema."
            logger.warning(msg, e)
            return
        conn.commit()
Пример #17
0
 def set_service_disabled_until(self,
                                service_id: int,
                                disabled_until: datetime,
                                *,
                                cur: Cursor = NotImplemented):
     sql = 'UPDATE services SET disabled_until=%s WHERE service_id=%s'
     cur.execute(sql, (disabled_until, service_id))
Пример #18
0
def upload_questions(questions: Questions, cursor: PostgresCursor) -> None:
    logger = log.get_logger()
    logger.info("Uploading questions")
    items = questions["items"]

    def iter_questions() -> Iterable[dict]:
        for question in items:
            yield {**question, "owner_id": question["owner"].get("user_id")}

    execute_batch(cursor, db.get_sql("upsert_question"), iter_questions(),
                  1000)

    logger.info("Uploading tags")
    tags = {(tag, ) for question in items for tag in question["tags"]}
    execute_batch(cursor, db.get_sql("upsert_tag"), tags)

    def iter_question_tags() -> Iterable[dict]:
        for question in items:
            question_id = question["question_id"]
            for tag in question["tags"]:
                yield {"question_id": question_id, "tag_id": tag}

    logger.info("Uploading question tags")
    delete, insert = db.get_sql_script("upsert_question_tag")
    question_ids = tuple({question["question_id"] for question in items})
    cursor.execute(delete, (question_ids, ))
    execute_batch(cursor, insert, iter_question_tags(), 10000)
Пример #19
0
 def manga_has_author(self,
                      manga_id: int,
                      *,
                      cur: Cursor = NotImplemented) -> bool:
     sql = 'SELECT EXISTS(SELECT 1 FROM manga_authors WHERE manga_id=%s) as "exists"'
     cur.execute(sql, (manga_id, ))
     return cur.fetchone()['exists']
Пример #20
0
def columns(pg_cur: cursor,
            table_schema: str,
            table_name: str,
            table_type: str = 'table',
            remove_pkey: bool = False,
            skip_columns: list = []) -> list:
    """
    Returns the list of columns of a table

    Parameters
    ----------
    pg_cur
        psycopg cursor
    table_schema
        the table_schema
    table_name
        the table
    table_type
        the type of table, i.e. view or table
    remove_pkey
        if True, the primary key is dropped
    skip_columns
        list of columns to be skipped
    """
    assert table_type.lower() in ('table', 'view')
    if table_type.lower() == 'table':
        sql = """SELECT attname
                    FROM pg_attribute 
                    WHERE attrelid = '{s}.{t}'::regclass
                    AND attisdropped IS NOT TRUE 
                    AND attnum > 0 
                    ORDER BY attnum ASC""".format(s=table_schema, t=table_name)
    else:
        sql = """
            SELECT c.column_name
                FROM information_schema.tables t
                    LEFT JOIN information_schema.columns c
                              ON t.table_schema = c.table_schema
                              AND t.table_name = c.table_name
                WHERE table_type = 'VIEW'
                      AND t.table_schema = '{s}'
                      AND t.table_name = '{t}'
                ORDER BY ordinal_position""".format(s=table_schema,
                                                    t=table_name)

    pg_cur.execute(sql)
    pg_fields = pg_cur.fetchall()
    pg_fields = [field[0] for field in pg_fields if field[0]]
    for col in skip_columns:
        try:
            pg_fields.remove(col)
        except ValueError:
            raise InvalidSkipColumns(
                'Cannot skip unexisting column "{col}" in "{s}.{t}"'.format(
                    col=col, s=table_schema, t=table_name))
    if remove_pkey:
        pkey = primary_key(pg_cur, table_schema, table_name)
        pg_fields.remove(pkey)
    return pg_fields
Пример #21
0
 def update_manga_next_update(self,
                              service_id: int,
                              manga_id: int,
                              next_update: datetime,
                              *,
                              cur: Cursor = NotImplemented) -> None:
     sql = 'UPDATE manga_service SET next_update=%s WHERE manga_id=%s AND service_id=%s'
     cur.execute(sql, (next_update, manga_id, service_id))
Пример #22
0
 def find_service_manga(self,
                        service_id: int,
                        title_id: str,
                        *,
                        cur: Cursor = NotImplemented) -> DictRow:
     sql = 'SELECT * from manga_service WHERE service_id=%s AND title_id=%s'
     cur.execute(sql, (service_id, title_id))
     return cur.fetchone()
Пример #23
0
 def set_manga_last_checked(self,
                            service_id: int,
                            manga_id: int,
                            last_checked: Optional[datetime],
                            *,
                            cur: Cursor = NotImplemented):
     sql = 'UPDATE manga_service SET last_check=%s WHERE manga_id=%s AND service_id=%s'
     cur.execute(sql, [last_checked, manga_id, service_id])
Пример #24
0
def query(cursor: Cursor, sql: str, *args: Union[str, Tuple[str, ...],
                                                 Password]) -> None:
    """
    Run a SQL query against a database cursor.
    """
    LOG.debug("Query: %r %% %r", sql, args)
    cursor.execute(
        sql, [str(arg) if isinstance(arg, Password) else arg for arg in args])
Пример #25
0
    def get_service_configs(self,
                            *,
                            cur: Cursor = NotImplemented
                            ) -> List[ServiceConfig]:
        sql = 'SELECT * FROM service_config'
        cur.execute(sql)

        return list(map(ServiceConfig.parse_obj, cur))
Пример #26
0
 def find_manga_by_title(self,
                         title: str,
                         *,
                         cur: Cursor = NotImplemented) -> Optional[Manga]:
     sql = 'SELECT * FROM manga WHERE title=%s LIMIT 1'
     cur.execute(sql, (title, ))
     row = cur.fetchone()
     return None if not row else Manga(**row)
Пример #27
0
def check_table(db: Cursor, table_name: str) -> bool:
    sql = """
    SELECT EXISTS (
        SELECT * FROM information_schema.tables
        WHERE table_name = %s
    );
    """
    db.execute(sql, (table_name, ))
    return db.fetchone()[0]
Пример #28
0
    def get_all_scheduled_runs(self,
                               *,
                               cur: Cursor = NotImplemented
                               ) -> List[ScheduledRunResult]:
        sql = 'SELECT sr.manga_id, sr.service_id, ms.title_id FROM scheduled_runs sr ' \
              'LEFT JOIN manga_service ms ON sr.manga_id = ms.manga_id AND sr.service_id = ms.service_id'

        cur.execute(sql)
        return list(map(ScheduledRunResult.parse_obj, cur))
Пример #29
0
def create_users_table(db: Cursor):
    sql = """
    CREATE TABLE IF NOT EXISTS users(
        id serial PRIMARY KEY,
        name VARCHAR(50),
        age INT
    )
    """
    db.execute(sql)
Пример #30
0
 def update_latest_release(self,
                           data: List[int],
                           *,
                           cur: Cursor = NotImplemented) -> None:
     format_ids = self.get_format_args(data)
     sql = 'UPDATE manga m SET latest_release=c.release_date FROM ' \
           f'(SELECT MAX(release_date), manga_id FROM chapters WHERE manga_id IN ({format_ids}) GROUP BY manga_id) as c(release_date, manga_id)' \
           'WHERE m.manga_id=c.manga_id'
     cur.execute(sql, data)