Exemplo n.º 1
0
 def _execute_statement(self, statement: str) -> None:
     session = Session(bind=self.postgres_engine)
     try:
         session.execute(statement)
         session.commit()
     except Exception as e:
         logging.warning("Failed to cleanup: %s", e)
         session.rollback()
     finally:
         session.close()
Exemplo n.º 2
0
 def _alter_session_variables(cls, session: Session) -> None:
     # Postgres uses a query cost analysis heuristic to decide what type of read to use for a particular query. It
     # sometimes chooses to use a sequential read because for hard disk drives (HDDs, as opposed to solid state
     # drives, SSDs) that may be faster than jumping around to random pages of an index. This is especially likely
     # when running over small sets of data. Setting this option changes the heuristic to almost always prefer index
     # reads.
     #
     # Our postgres instances run on SSDs, so this should increase performance for us. This is also important
     # because sequential reads lock an entire table, whereas index reads only lock the particular predicate from a
     # query. See https://www.postgresql.org/docs/12/transaction-iso.html and
     # https://stackoverflow.com/questions/42288808/why-does-postgresql-serializable-transaction-think-this-as-conflict.
     #
     # TODO(#3928): Once defined in code, set this on the SQL instance itself instead of per session.
     if session.bind.dialect.name == "postgresql":
         session.execute("SET random_page_cost=1;")
Exemplo n.º 3
0
    def _fetch_most_recent_snapshots_for_entity_type(
        self,
        session: Session,
        master_class: Type,
        entity_ids: Set[int],
        schema: ModuleType,
    ) -> List[DatabaseEntity]:
        """Returns a list containing the most recent snapshot for each ID in
        |entity_ids| with type |master_class|
        """

        # Get name of historical table in database (as distinct from name of ORM
        # class representing historical table in code)
        history_table_class = _get_historical_class(master_class, schema)
        history_table_name = history_table_class.__table__.name
        history_table_primary_key_col_name = (
            history_table_class.get_primary_key_column_name())
        # See module assumption #2
        master_table_primary_key_col_name = master_class.get_primary_key_column_name(
        )
        ids_list = ", ".join([str(id) for id in entity_ids])

        # Get snapshot IDs in a separate query. The subquery logic here is ugly
        # and easier to do as a raw string query than through the ORM query, but
        # the return type of a raw string query is just a collection of values
        # rather than an ORM model. Doing this step as a separate query enables
        # passing just the IDs to the second request, which allows proper ORM
        # models to be returned as a result.
        snapshot_ids_query = f"""
        SELECT
          history.{history_table_primary_key_col_name},
          history.{master_table_primary_key_col_name},
          history.valid_to
        FROM {history_table_name} history
        JOIN (
          SELECT 
            {master_table_primary_key_col_name}, 
            MAX(valid_from) AS valid_from
          FROM {history_table_name}
          WHERE {master_table_primary_key_col_name} IN ({ids_list})
          GROUP BY {master_table_primary_key_col_name}
        ) AS most_recent_valid_from
        ON history.{master_table_primary_key_col_name} = 
            most_recent_valid_from.{master_table_primary_key_col_name}
        WHERE history.valid_from = most_recent_valid_from.valid_from;
        """

        results = session.execute(text(snapshot_ids_query)).fetchall()

        # Use only results where valid_to is None to exclude any overlapping
        # non-open snapshots
        snapshot_ids = [
            snapshot_id for snapshot_id, master_id, valid_to in results
            if valid_to is None
        ]

        # Removing the below early return will pass in tests but fail in
        # production, because SQLite allows "IN ()" but Postgres does not
        if not snapshot_ids:
            return []

        filter_statement = (
            "{historical_table}.{primary_key_column} IN ({ids_list})".format(
                historical_table=history_table_name,
                primary_key_column=history_table_class.
                get_primary_key_column_name(),
                ids_list=", ".join([str(id) for id in snapshot_ids]),
            ))

        return session.query(history_table_class).filter(
            text(filter_statement)).all()