def insertBranchRevisions(self, bzr_branch, revids_to_insert): """Insert a batch of BranchRevision rows.""" self.logger.info("Inserting %d branchrevision records.", len(revids_to_insert)) revid_seq_pairs = revids_to_insert.items() for revid_seq_pair_chunk in iter_list_chunks(revid_seq_pairs, 10000): self.db_branch.createBranchRevisionFromIDs(revid_seq_pair_chunk)
def syncBranch(self, bzr_branch): """Synchronize the database view of a branch with Bazaar data. `bzr_branch` must be read locked. Several tables must be updated: * Revision: there must be one Revision row for each revision in the branch ancestry. If the row for a revision that has just been added to the branch is already present, it must be checked for consistency. * BranchRevision: there must be one BrancheRevision row for each revision in the branch ancestry. If history revisions became merged revisions, the corresponding rows must be changed. * Branch: the branch-scanner status information must be updated when the sync is complete. """ self.logger.info("Scanning branch: %s", self.db_branch.unique_name) self.logger.info(" from %s", bzr_branch.base) # Get the history and ancestry from the branch first, to fail early # if something is wrong with the branch. self.logger.info("Retrieving history from bzrlib.") bzr_history = branch_revision_history(bzr_branch) # The BranchRevision, Revision and RevisionParent tables are only # written to by the branch-scanner, so they are not subject to # write-lock contention. Update them all in a single transaction to # improve the performance and allow garbage collection in the future. db_ancestry, db_history = self.retrieveDatabaseAncestry() (new_ancestry, branchrevisions_to_delete, revids_to_insert) = self.planDatabaseChanges(bzr_branch, bzr_history, db_ancestry, db_history) new_db_revs = (new_ancestry - getUtility(IRevisionSet).onlyPresent(new_ancestry)) self.logger.info("Adding %s new revisions.", len(new_db_revs)) for revids in iter_list_chunks(list(new_db_revs), 10000): revisions = self.getBazaarRevisions(bzr_branch, revids) self.syncRevisions(bzr_branch, revisions, revids_to_insert) self.deleteBranchRevisions(branchrevisions_to_delete) self.insertBranchRevisions(bzr_branch, revids_to_insert) transaction.commit() # Synchronize the RevisionCache for this branch. getUtility(IRevisionSet).updateRevisionCacheForBranch(self.db_branch) transaction.commit() # Notify any listeners that the tip of the branch has changed, but # before we've actually updated the database branch. initial_scan = (len(db_history) == 0) notify(events.TipChanged(self.db_branch, bzr_branch, initial_scan)) # The Branch table is modified by other systems, including the web UI, # so we need to update it in a short transaction to avoid causing # timeouts in the webapp. This opens a small race window where the # revision data is updated in the database, but the Branch table has # not been updated. Since this has no ill-effect, and can only err on # the pessimistic side (tell the user the data has not yet been # updated although it has), the race is acceptable. self.updateBranchStatus(bzr_history) notify( events.ScanCompleted(self.db_branch, bzr_branch, self.logger, new_ancestry)) transaction.commit()
def syncBranch(self, bzr_branch): """Synchronize the database view of a branch with Bazaar data. `bzr_branch` must be read locked. Several tables must be updated: * Revision: there must be one Revision row for each revision in the branch ancestry. If the row for a revision that has just been added to the branch is already present, it must be checked for consistency. * BranchRevision: there must be one BrancheRevision row for each revision in the branch ancestry. If history revisions became merged revisions, the corresponding rows must be changed. * Branch: the branch-scanner status information must be updated when the sync is complete. """ self.logger.info("Scanning branch: %s", self.db_branch.unique_name) self.logger.info(" from %s", bzr_branch.base) # Get the history and ancestry from the branch first, to fail early # if something is wrong with the branch. self.logger.info("Retrieving history from bzrlib.") bzr_history = branch_revision_history(bzr_branch) # The BranchRevision, Revision and RevisionParent tables are only # written to by the branch-scanner, so they are not subject to # write-lock contention. Update them all in a single transaction to # improve the performance and allow garbage collection in the future. db_ancestry, db_history = self.retrieveDatabaseAncestry() (new_ancestry, branchrevisions_to_delete, revids_to_insert) = self.planDatabaseChanges( bzr_branch, bzr_history, db_ancestry, db_history) new_db_revs = ( new_ancestry - getUtility(IRevisionSet).onlyPresent(new_ancestry)) self.logger.info("Adding %s new revisions.", len(new_db_revs)) for revids in iter_list_chunks(list(new_db_revs), 10000): revisions = self.getBazaarRevisions(bzr_branch, revids) self.syncRevisions(bzr_branch, revisions, revids_to_insert) self.deleteBranchRevisions(branchrevisions_to_delete) self.insertBranchRevisions(bzr_branch, revids_to_insert) transaction.commit() # Synchronize the RevisionCache for this branch. getUtility(IRevisionSet).updateRevisionCacheForBranch(self.db_branch) transaction.commit() # Notify any listeners that the tip of the branch has changed, but # before we've actually updated the database branch. initial_scan = (len(db_history) == 0) notify(events.TipChanged(self.db_branch, bzr_branch, initial_scan)) # The Branch table is modified by other systems, including the web UI, # so we need to update it in a short transaction to avoid causing # timeouts in the webapp. This opens a small race window where the # revision data is updated in the database, but the Branch table has # not been updated. Since this has no ill-effect, and can only err on # the pessimistic side (tell the user the data has not yet been # updated although it has), the race is acceptable. self.updateBranchStatus(bzr_history) notify( events.ScanCompleted( self.db_branch, bzr_branch, self.logger, new_ancestry)) transaction.commit()