Пример #1
0
def _get_true_create_dates_for_unmatched_records(
    resource_name: str,
    con: sqlalchemy.engine.base.Connection,
):
    """
    All rows start with CreateDate and LastModifiedDate initialized to "now",
        but updated rows need the original CreateDate pulled from existing table.

    Note: UPDATE-FROM is not available in sqlite until v3.33.0, thus the
        double select goofiness.

    Parameters
    ----------
    resource_name: str
        the name of the API resource, e.g. "Courses", to be used in SQL
    con: sqlalchemy.engine.base.Connection
        an open database connection, which will not be closed by this function
    """
    con.execute(
        f"""
        UPDATE Unmatched_{resource_name}
            SET CreateDate = (
                SELECT c.CreateDate
                FROM {resource_name} c
                WHERE c.SourceId = Unmatched_{resource_name}.SourceId
            )
            WHERE EXISTS (
                SELECT *
                FROM {resource_name} c
                WHERE c.SourceId = Unmatched_{resource_name}.SourceId
            ) AND SyncNeeded = 1
        """
    )
Пример #2
0
def create_entity_group_map(conn: sqlalchemy.engine.base.Connection):
    """Create EntityGroupMap table"""
    conn.execute("""
            CREATE TABLE IF NOT EXISTS EntityGroupMap(
                EntityId INTEGER NOT NULL REFERENCES Entities(Id),
                GroupId INTEGER NOT NULL REFERENCES Groups(Id)
            )
        """)
Пример #3
0
def create_groupings(conn: sqlalchemy.engine.base.Connection):
    """Create Grouping table"""
    conn.execute("""
            CREATE TABLE IF NOT EXISTS Grouping(
                Id INTEGER NOT NULL PRIMARY KEY,
                Name TEXT NOT NULL
            )
        """)
Пример #4
0
def create_enitity_types(conn: sqlalchemy.engine.base.Connection):
    """Create EntityType table"""
    conn.execute("""
            CREATE TABLE IF NOT EXISTS EntityType(
                Id INTEGER NOT NULL PRIMARY KEY,
                Name TEXT NOT NULL,
                Description TEXT NOT NULL
            )
        """)
Пример #5
0
def create_entities(conn: sqlalchemy.engine.base.Connection):
    """Create Entity table"""
    conn.execute("""
            CREATE TABLE IF NOT EXISTS Entity(
                Id INTEGER NOT NULL PRIMARY KEY,
                Hash TEXT NOT NULL UNIQUE,
                Type INTEGER NOT NULL REFERENCES EntityType(Id),
                Name TEXT
            )
        """)
Пример #6
0
def create_group(conn: sqlalchemy.engine.base.Connection):
    """Create Group table"""
    conn.execute("""
            CREATE TABLE IF NOT EXISTS 'Group'(
                Id INTEGER NOT NULL PRIMARY KEY,
                GroupingId INTEGER NOT NULL REFERENCES Grouping(Id),
                Name TEXT NOT NULL,
                Description TEXT
            )
        """)
Пример #7
0
def insert_enitity_types(conn: sqlalchemy.engine.base.Connection):
    """Populate EntityType table"""
    conn.execute("""
            INSERT OR REPLACE INTO EntityType (Id, Name, Description)
            VALUES
                (0, 'Unkown', 'Unkown file type'),
                (1, 'image', 'Single Image'),
                (2, 'gif', 'Single Gif'),
                (3, 'gallery', 'Multiple images/files in a directory treated as a single enitity'),
                (4, 'video', 'Single video');
        """)
Пример #8
0
def _create_unmatched_records_temp_table(
    resource_name: str,
    con: sqlalchemy.engine.base.Connection,
):
    """
    Select unmatched records into new temp table - differing by hash for same identity.
    Single entry in result set if identity only exists in one table (meaning add or missing),
        so SyncNeeded flag will indicate which table it's from.
    Double entry in result set if identity exists in both (meaning update needed),
        so SyncNeeded will show which row is from which table.

    Parameters
    ----------
    resource_name: str
        the name of the API resource, e.g. "Courses", to be used in SQL
    con: sqlalchemy.engine.base.Connection
        an open database connection, which will not be closed by this function
    """
    con.execute(f"DROP INDEX IF EXISTS ID_{resource_name}")
    con.execute(f"DROP TABLE IF EXISTS Unmatched_{resource_name}")
    con.execute(
        f"""
        CREATE TABLE Unmatched_{resource_name} AS
        SELECT * FROM (
            SELECT * FROM {resource_name}
            UNION ALL
            SELECT * FROM Sync_{resource_name}
        )
        GROUP BY SourceId, Hash
        HAVING COUNT(*) = 1
        """
    )
    con.execute(
        f"CREATE INDEX IF NOT EXISTS ID_{resource_name} ON Unmatched_{resource_name}(SourceId)"
    )
Пример #9
0
def delete(
    table: sqlalchemy.Table,
    connection: sqlalchemy.engine.base.Connection,
    logger: logging.Logger,
):
    """Deletes all row from a table.
    Useful to wipe a table before re-inserting fresh data in ETL jobs."""
    count_statement = select([func.count()]).select_from(table)
    n = connection.execute(count_statement).fetchall()[0][0]
    if logger:
        logger.info(f"Found existing table {table.name} with {n} rows.")
        logger.info(f"Deleting table {table.name}...")
    connection.execute(table.delete())
    count_statement = select([func.count()]).select_from(table)
    n = connection.execute(count_statement).fetchall()[0][0]
    if logger:
        logger.info(f"Rows after deletion: {n}.")
Пример #10
0
 def upsert(
     cls,
     conn: sa.engine.base.Connection,
     instances: Iterable["UpsertMixin"],
 ) -> None:
     for instance in instances:
         data = {
             key: getattr(instance, key)
             for key in cls.__table__.c.keys()
             if getattr(instance, key) is not None
         }
         insert = (
             postgresql.insert(cls.__table__)
             .values(**data)
             .on_conflict_do_update(
                 index_elements=cls.__table__.primary_key.columns,
                 set_={k: data[k] for k in data if k != "id"},
             )
         )
         conn.execute(insert)
Пример #11
0
def _ensure_main_table_exists(
    resource_name: str,
    con: sqlalchemy.engine.base.Connection,
):
    """
    Ensure the main resource table exists, creating if necessary.

    Parameters
    ----------
    resource_name: str
        the name of the API resource, e.g. "Courses", to be used in SQL
    table_columns_sql: str
        the columns for the resource in the database, in SQL table creation form,
            with dangling commas
    con: sqlalchemy.engine.base.Connection
        an open database connection, which will not be closed by this function
    """
    con.execute(f"DROP INDEX IF EXISTS SYNCNEEDED_{resource_name}")
    con.execute(
        f"""
        CREATE TABLE IF NOT EXISTS {resource_name} (
            {SYNC_COLUMNS_SQL}
        )
        """
    )
    con.execute(
        f"CREATE INDEX IF NOT EXISTS SYNCNEEDED_{resource_name} ON {resource_name}(SyncNeeded)"
    )
Пример #12
0
    def fetch(cls, conn: sqlalchemy.engine.base.Connection):
        result: sqlalchemy.engine.result.ResultProxy = conn.execute("""
SELECT DISTINCT
  kp.poradi as kp_poradi,
  p.poradi as p_poradi,
  ap.poradi as ap_poradi,
  z.id as z_id,
  z.prijmeni as z_prijmeni,
  z.jmeno as z_jmeno,
  (SELECT COUNT(zavodnikID) FROM vysledek WHERE zavodnikID=z.id) as nzavodu,
  GROUP_CONCAT(kp.kat) as kategorie,
  s.body as s_body,
  ap.scores as ap_scores
FROM zavodnik z, soucet s, absporadi ap, poradi p, katporadi kp
WHERE z.id=s.zavodnikID AND z.id=ap.zavodnikID AND z.id=p.zavodnikID AND z.id=kp.zavodnikID
GROUP BY z.id
ORDER BY ap.poradi ASC
""")
        return [cls(**row) for row in result]
    def fetch(cls, conn: sqlalchemy.engine.base.Connection):
        result: sqlalchemy.engine.result.ResultProxy = conn.execute("""
SELECT DISTINCT
  kp.poradi as kp_poradi,
  p.poradi as p_poradi,
  ap.poradi as ap_poradi,
  z.id as z_id,
  z.prijmeni as z_prijmeni,
  z.jmeno as z_jmeno,
  (SELECT COUNT(zavodnikID) FROM vysledek WHERE zavodnikID=z.id) as nzavodu,
  GROUP_CONCAT(kp.kat) as kategorie,
  s.body as s_body,
  ap.scores as ap_scores
FROM zavodnik z, soucet s, absporadi ap, poradi p, katporadi kp
WHERE z.id=s.zavodnikID AND z.id=ap.zavodnikID AND z.id=p.zavodnikID AND z.id=kp.zavodnikID
GROUP BY z.id
ORDER BY ap.poradi ASC
""")
        return [cls(**row) for row in result]
Пример #14
0
def _update_resource_table_with_changes(
    resource_name: str,
    con: sqlalchemy.engine.base.Connection,
):
    """
    Update main resource table with new and updated records

    Parameters
    ----------
    resource_name: str
        the name of the API resource, e.g. "Courses", to be used in SQL
    primary_keys: str
        a comma separated list of the primary key columns for the resource,
        e.g. "id,courseId"
    con: sqlalchemy.engine.base.Connection
        an open database connection, which will not be closed by this function
    """
    CHANGED_ROWS_CTE = f"""
                        changedRows AS (
                            SELECT * FROM Unmatched_{resource_name}
                            WHERE (SourceId) IN (
                                SELECT SourceId FROM Unmatched_{resource_name}
                                GROUP BY SourceId
                                HAVING COUNT(*) > 1
                            ) AND SyncNeeded = 1
                        )
                        """

    # delete obsolete data from regular table
    con.execute(
        # changed rows CTE (from SyncNeeded side only)
        f"""
        WITH
        {CHANGED_ROWS_CTE}
        DELETE FROM {resource_name}
        WHERE (SourceId) IN (
            SELECT SourceId from changedRows
        )
        """
    )

    # insert new and changed data into regular table
    con.execute(
        #    changed rows CTE (from SyncNeeded side only)
        #    new rows CTE (also from SyncNeeded side)
        f"""
        WITH
            {CHANGED_ROWS_CTE},
            newRows AS (
                SELECT * FROM Unmatched_{resource_name}
                WHERE (SourceId) IN (
                    SELECT SourceId FROM Unmatched_{resource_name}
                    GROUP BY SourceId
                    HAVING COUNT(*) = 1 AND SyncNeeded = 1
                )
            )
        INSERT INTO {resource_name}
            SELECT * FROM Unmatched_{resource_name}
            WHERE (SourceId) IN (
                SELECT SourceId FROM changedRows
                UNION ALL
                SELECT SourceId FROM newRows
            ) AND SyncNeeded = 1
        """
    )

    con.execute(
        # reset SyncNeeded flag on main table
        f"""
        UPDATE {resource_name}
        SET SyncNeeded = 0
        WHERE SyncNeeded != 0
        """
    )
Пример #15
0
def _field_in_table(connection: sqlalchemy.engine.base.Connection,
                    table_name: str, field_name: str) -> bool:
    """Return true if a field already exists in a given table."""
    sql = f"select count(*) from pragma_table_info('{table_name}') where name='{field_name}';"
    result = connection.execute(sql)
    return result.first().values()[0] == 1
Пример #16
0
def _add_field_if_not_exists(connection: sqlalchemy.engine.base.Connection,
                             table_name: str, field_name: str,
                             field_description: str):
    if not _field_in_table(connection, table_name, field_name):
        sql = f"ALTER TABLE {table_name} ADD {field_name} {field_description};"
        connection.execute(sql)