def _commit(self):
     """Commit ongoing transaction, start a new one."""
     self.transaction_manager.commit()
     self.transaction_manager.begin()
     self.cur = cursor()
     # Disable slow sequential scans.  The database server is reluctant to
     # use indexes on tables that undergo large changes, such as the
     # deletion of large numbers of rows in this case.  Usually it's
     # right but in this case it seems to slow things down dramatically and
     # unnecessarily.  We disable sequential scans for every commit since
     # initZopeless by default resets our database connection with every
     # new transaction.
     # MultiTableCopy disables sequential scans for the first batch; this
     # just renews our setting after the connection is reset.
     postgresql.allow_sequential_scans(self.cur, False)
 def _commit(self):
     """Commit ongoing transaction, start a new one."""
     self.transaction_manager.commit()
     self.transaction_manager.begin()
     self.cur = cursor()
     # Disable slow sequential scans.  The database server is reluctant to
     # use indexes on tables that undergo large changes, such as the
     # deletion of large numbers of rows in this case.  Usually it's
     # right but in this case it seems to slow things down dramatically and
     # unnecessarily.  We disable sequential scans for every commit since
     # initZopeless by default resets our database connection with every
     # new transaction.
     # MultiTableCopy disables sequential scans for the first batch; this
     # just renews our setting after the connection is reset.
     postgresql.allow_sequential_scans(self.cur, False)
    def pour(self, transaction_manager):
        """Pour data from holding tables back into source tables.

        Rows in the holding table that have their new_id set to null are
        skipped.

        The transaction manager is committed and re-opened after every batch
        run.

        Batch sizes are dynamically adjusted to meet the stated time goal.
        """
        if self.last_extracted_table is None:
            if not self.needsRecovery():
                raise AssertionError("Can't pour: no tables extracted")
        elif self.last_extracted_table != len(self.tables) - 1:
            raise AssertionError(
                "Not safe to pour: last table '%s' was not extracted"
                % self.tables[-1])

        cur = self._commit(transaction_manager)

        # Don't let postgres revert to slow sequential scans while we pour.
        # That might otherwise happen to the holding table as its vital "id"
        # index degrades with the removal of rows.
        postgresql.allow_sequential_scans(cur, False)

        # Main loop: for each of the source tables being copied, see if
        # there's a matching holding table.  If so, prepare it, pour it back
        # into the source table, and drop.
        for table in self.tables:
            holding_table_unquoted = self.getRawHoldingTableName(table)

            if not postgresql.have_table(cur, holding_table_unquoted):
                # We know we're in a suitable state for pouring.  If this
                # table does not exist, it must be because it's been poured
                # out completely and dropped in an earlier instance of this
                # loop, before the failure we're apparently recovering from.
                continue

            holding_table = self.getHoldingTableName(table)
            self.logger.info("Pouring %s back into %s..."
                         % (holding_table, table))

            tablestarttime = time.time()

            has_new_id = postgresql.table_has_column(
                cur, holding_table_unquoted, 'new_id')

            self._pourTable(
                holding_table, table, has_new_id, transaction_manager)

            # Drop holding table.  It may still contain rows with id set to
            # null.  Those must not be poured.
            postgresql.drop_tables(cursor(), holding_table)

            self.logger.debug(
                "Pouring %s took %.3f seconds."
                % (holding_table, time.time() - tablestarttime))

            cur = self._commit(transaction_manager)

        # In future, let the database perform sequential scans again if it
        # decides that's best.
        postgresql.allow_sequential_scans(cur, True)
    def pour(self, transaction_manager):
        """Pour data from holding tables back into source tables.

        Rows in the holding table that have their new_id set to null are
        skipped.

        The transaction manager is committed and re-opened after every batch
        run.

        Batch sizes are dynamically adjusted to meet the stated time goal.
        """
        if self.last_extracted_table is None:
            if not self.needsRecovery():
                raise AssertionError("Can't pour: no tables extracted")
        elif self.last_extracted_table != len(self.tables) - 1:
            raise AssertionError(
                "Not safe to pour: last table '%s' was not extracted" %
                self.tables[-1])

        cur = self._commit(transaction_manager)

        # Don't let postgres revert to slow sequential scans while we pour.
        # That might otherwise happen to the holding table as its vital "id"
        # index degrades with the removal of rows.
        postgresql.allow_sequential_scans(cur, False)

        # Main loop: for each of the source tables being copied, see if
        # there's a matching holding table.  If so, prepare it, pour it back
        # into the source table, and drop.
        for table in self.tables:
            holding_table_unquoted = self.getRawHoldingTableName(table)

            if not postgresql.have_table(cur, holding_table_unquoted):
                # We know we're in a suitable state for pouring.  If this
                # table does not exist, it must be because it's been poured
                # out completely and dropped in an earlier instance of this
                # loop, before the failure we're apparently recovering from.
                continue

            holding_table = self.getHoldingTableName(table)
            self.logger.info("Pouring %s back into %s..." %
                             (holding_table, table))

            tablestarttime = time.time()

            has_new_id = postgresql.table_has_column(cur,
                                                     holding_table_unquoted,
                                                     'new_id')

            self._pourTable(holding_table, table, has_new_id,
                            transaction_manager)

            # Drop holding table.  It may still contain rows with id set to
            # null.  Those must not be poured.
            postgresql.drop_tables(cursor(), holding_table)

            self.logger.debug("Pouring %s took %.3f seconds." %
                              (holding_table, time.time() - tablestarttime))

            cur = self._commit(transaction_manager)

        # In future, let the database perform sequential scans again if it
        # decides that's best.
        postgresql.allow_sequential_scans(cur, True)