def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db self.db = IndexedDatabase( artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=MarshalSerializer(), ) # A list of the maximum node_id stored by each call to # write_new_nodes(): self._max_node_ids = [0] # A map {node_id : {cvs_path : node_id}}: self._cache = {} # The number of directories in the repository: num_dirs = len([ cvs_path for cvs_path in self.cvs_file_db.itervalues() if isinstance(cvs_path, CVSDirectory) ]) self._cache_max_size = max( int(self.CACHE_SIZE_MULTIPLIER * num_dirs), self.MIN_CACHE_LIMIT, )
def start(self): self._delta_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_READ, ) self._delta_db.__delitem__ = lambda id: None self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ, ) serializer = MarshalSerializer() if self._compress: serializer = CompressingSerializer(serializer) self._co_db = Database( artifact_manager.get_temp_file(config.CVS_CHECKOUT_DB), DB_OPEN_NEW, serializer, ) # The set of CVSFile instances whose TextRecords have already been # read: self._loaded_files = set() # A map { CVSFILE : _FileTree } for files that currently have live # revisions: self._text_record_db = TextRecordDatabase(self._delta_db, self._co_db)
class InternalRevisionCollector(RevisionCollector): """The RevisionCollector used by InternalRevisionReader.""" def __init__(self, compress): RevisionCollector.__init__(self) self._compress = compress def register_artifacts(self, which_pass): artifact_manager.register_temp_file( config.RCS_DELTAS_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file(config.RCS_DELTAS_STORE, which_pass) artifact_manager.register_temp_file( config.RCS_TREES_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file(config.RCS_TREES_STORE, which_pass) def start(self): serializer = MarshalSerializer() if self._compress: serializer = CompressingSerializer(serializer) self._delta_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, serializer, ) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer), ) def _writeout(self, text_record, text): self.text_record_db.add(text_record) self._delta_db[text_record.id] = text def process_file(self, cvs_file_items): """Read revision information for the file described by CVS_FILE_ITEMS. Compute the text record refcounts, discard any records that are unneeded, and store the text records for the file to the _rcs_trees database.""" # A map from cvs_rev_id to TextRecord instance: self.text_record_db = TextRecordDatabase(self._delta_db, NullDatabase()) cvs2svn_rcsparse.parse( open(cvs_file_items.cvs_file.rcs_path, 'rb'), _Sink(self, cvs_file_items), ) self.text_record_db.recompute_refcounts(cvs_file_items) self.text_record_db.free_unused() self._rcs_trees[cvs_file_items.cvs_file.id] = self.text_record_db del self.text_record_db def finish(self): self._delta_db.close() self._rcs_trees.close()
class InternalRevisionExcluder(RevisionExcluder): """The RevisionExcluder used by InternalRevisionReader.""" def register_artifacts(self, which_pass): artifact_manager.register_temp_file_needed(config.RCS_TREES_STORE, which_pass) artifact_manager.register_temp_file_needed( config.RCS_TREES_INDEX_TABLE, which_pass) artifact_manager.register_temp_file(config.RCS_TREES_FILTERED_STORE, which_pass) artifact_manager.register_temp_file( config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass) def start(self): self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ) primer = (FullTextRecord, DeltaTextRecord) self._new_tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), artifact_manager.get_temp_file( config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer)) def process_file(self, cvs_file_items): text_record_db = self._tree_db[cvs_file_items.cvs_file.id] text_record_db.recompute_refcounts(cvs_file_items) text_record_db.free_unused() self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db def finish(self): self._tree_db.close() self._new_tree_db.close()
def start(self): self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ) primer = (FullTextRecord, DeltaTextRecord) self._new_tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer))
def start(self): self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ) primer = (FullTextRecord, DeltaTextRecord) self._new_tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), artifact_manager.get_temp_file( config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer))
def start(self): ser = MarshalSerializer() if self._compress: ser = CompressingSerializer(ser) self._rcs_deltas = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, ser) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer))
def start(self): serializer = MarshalSerializer() if self._compress: serializer = CompressingSerializer(serializer) self._delta_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_DELTAS_STORE), artifact_manager.get_temp_file(config.RCS_DELTAS_INDEX_TABLE), DB_OPEN_NEW, serializer, ) primer = (FullTextRecord, DeltaTextRecord) self._rcs_trees = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer), )
def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError, "Invalid 'mode' argument to PersistenceManager" primer = ( SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit, ) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM))
def MetadataDatabase(store_filename, index_table_filename, mode): """A Database to store Metadata instances that describe CVSRevisions. This database manages a map id -> Metadata instance where id is a unique identifier for the metadata.""" return IndexedDatabase( store_filename, index_table_filename, mode, PrimedPickleSerializer((Metadata,)), )
class InternalRevisionExcluder(RevisionExcluder): """The RevisionExcluder used by InternalRevisionReader.""" def register_artifacts(self, which_pass): artifact_manager.register_temp_file_needed( config.RCS_TREES_STORE, which_pass ) artifact_manager.register_temp_file_needed( config.RCS_TREES_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file( config.RCS_TREES_FILTERED_STORE, which_pass ) artifact_manager.register_temp_file( config.RCS_TREES_FILTERED_INDEX_TABLE, which_pass ) def start(self): self._tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_STORE), artifact_manager.get_temp_file(config.RCS_TREES_INDEX_TABLE), DB_OPEN_READ) primer = (FullTextRecord, DeltaTextRecord) self._new_tree_db = IndexedDatabase( artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_STORE), artifact_manager.get_temp_file(config.RCS_TREES_FILTERED_INDEX_TABLE), DB_OPEN_NEW, PrimedPickleSerializer(primer)) def process_file(self, cvs_file_items): text_record_db = self._tree_db[cvs_file_items.cvs_file.id] text_record_db.recompute_refcounts(cvs_file_items) text_record_db.free_unused() self._new_tree_db[cvs_file_items.cvs_file.id] = text_record_db def finish(self): self._tree_db.close() self._new_tree_db.close()
def open(self): """Set up the SVNRepositoryMirror and prepare it for SVNCommits.""" self._key_generator = KeyGenerator() self._delegates = [] # A map from LOD to LODHistory instance for all LODs that have # been defines so far: self._lod_histories = {} # This corresponds to the 'nodes' table in a Subversion fs. (We # don't need a 'representations' or 'strings' table because we # only track metadata, not file contents.) self._nodes_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE), artifact_manager.get_temp_file( config.SVN_MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=_NodeSerializer()) # Start at revision 0 without a root node. It will be created # by _open_writable_root_node. self._youngest = 0
def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError("Invalid 'mode' argument to PersistenceManager") primer = (SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer, ) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM), )
def open(self): """Set up the SVNRepositoryMirror and prepare it for SVNCommits.""" self._key_generator = KeyGenerator() self._delegates = [ ] # A map from LOD to LODHistory instance for all LODs that have # been defines so far: self._lod_histories = {} # This corresponds to the 'nodes' table in a Subversion fs. (We # don't need a 'representations' or 'strings' table because we # only track metadata, not file contents.) self._nodes_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=_NodeSerializer() ) # Start at revision 0 without a root node. It will be created # by _open_writable_root_node. self._youngest = 0
class SVNRepositoryMirror: """Mirror a Subversion repository and its history. Mirror a Subversion repository as it is constructed, one SVNCommit at a time. For each LineOfDevelopment we store a skeleton of the directory structure within that LOD for each SVN revision number in which it changed. The creation of a dumpfile or Subversion repository is handled by delegates. See the add_delegate() method for how to set delegates. For each LOD that has been seen so far, an LODHistory instance is stored in self._lod_histories. An LODHistory keeps track of each SVNRevision in which files were added to or deleted from that LOD, as well as the node id of the node tree describing the LOD contents at that SVN revision. The LOD trees themselves are stored in the _nodes_db database, which maps node ids to nodes. A node is a map from CVSPath.id to ids of the corresponding subnodes. The _nodes_db is stored on disk and each access is expensive. The _nodes_db database only holds the nodes for old revisions. The revision that is being constructed is kept in memory in the _new_nodes map, which is cheap to access. You must invoke start_commit() before each SVNCommit and end_commit() afterwards. *** WARNING *** Path arguments to methods in this class MUST NOT have leading or trailing slashes.""" class ParentMissingError(Exception): """The parent of a path is missing. Exception raised if an attempt is made to add a path to the repository mirror but the parent's path doesn't exist in the youngest revision of the repository.""" pass class PathExistsError(Exception): """The path already exists in the repository. Exception raised if an attempt is made to add a path to the repository mirror and that path already exists in the youngest revision of the repository.""" pass def register_artifacts(self, which_pass): """Register the artifacts that will be needed for this object.""" artifact_manager.register_temp_file( config.SVN_MIRROR_NODES_INDEX_TABLE, which_pass ) artifact_manager.register_temp_file( config.SVN_MIRROR_NODES_STORE, which_pass ) def open(self): """Set up the SVNRepositoryMirror and prepare it for SVNCommits.""" self._key_generator = KeyGenerator() self._delegates = [ ] # A map from LOD to LODHistory instance for all LODs that have # been defines so far: self._lod_histories = {} # This corresponds to the 'nodes' table in a Subversion fs. (We # don't need a 'representations' or 'strings' table because we # only track metadata, not file contents.) self._nodes_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=_NodeSerializer() ) # Start at revision 0 without a root node. It will be created # by _open_writable_root_node. self._youngest = 0 def start_commit(self, revnum, revprops): """Start a new commit.""" self._youngest = revnum # A map {node_id : {CVSPath : node_id}}. self._new_nodes = {} self._invoke_delegates('start_commit', revnum, revprops) def end_commit(self): """Called at the end of each commit. This method copies the newly created nodes to the on-disk nodes db.""" # Copy the new nodes to the _nodes_db for id, value in self._new_nodes.items(): self._nodes_db[id] = value del self._new_nodes self._invoke_delegates('end_commit') def _get_lod_history(self, lod): """Return the LODHistory instance describing LOD. Create a new (empty) LODHistory if it doesn't yet exist.""" try: return self._lod_histories[lod] except KeyError: lod_history = LODHistory() self._lod_histories[lod] = lod_history return lod_history def _create_empty_node(self): """Create and return a new, empty, writable node.""" new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), {}) self._new_nodes[new_node.id] = new_node.entries return new_node def _copy_node(self, old_node): """Create and return a new, writable node that is a copy of OLD_NODE.""" new_node = _WritableMirrorNode( self, self._key_generator.gen_id(), old_node.entries.copy() ) self._new_nodes[new_node.id] = new_node.entries return new_node def _get_node(self, id): """Return the node for id ID. The node might be read from either self._nodes_db or self._new_nodes. Return an instance of _MirrorNode.""" try: return _WritableMirrorNode(self, id, self._new_nodes[id]) except KeyError: return _ReadOnlyMirrorNode(self, id, self._nodes_db[id]) def _open_readonly_lod_node(self, lod, revnum): """Open a readonly node for the root path of LOD at revision REVNUM. Return an instance of _MirrorNode if the path exists; otherwise, raise KeyError.""" lod_history = self._get_lod_history(lod) node_id = lod_history.get_id(revnum) return self._get_node(node_id) def _open_readonly_node(self, cvs_path, lod, revnum): """Open a readonly node for CVS_PATH from LOD at REVNUM. If cvs_path refers to a leaf node, return None. Raise KeyError if the node does not exist.""" if cvs_path.parent_directory is None: return self._open_readonly_lod_node(lod, revnum) else: parent_node = self._open_readonly_node( cvs_path.parent_directory, lod, revnum ) return parent_node[cvs_path] def _open_writable_lod_node(self, lod, create, invoke_delegates=True): """Open a writable node for the root path in LOD. Iff CREATE is True, create the path and any missing directories. Return an instance of _WritableMirrorNode. Raise KeyError if the path doesn't already exist and CREATE is not set.""" lod_history = self._get_lod_history(lod) try: id = lod_history.get_id() except KeyError: if create: node = self._create_empty_node() lod_history.update(self._youngest, node.id) if invoke_delegates: self._invoke_delegates('initialize_lod', lod) else: raise else: node = self._get_node(id) if not isinstance(node, _WritableMirrorNode): # Node was created in an earlier revision, so we have to copy # it to make it writable: node = self._copy_node(node) lod_history.update(self._youngest, node.id) return node def _open_writable_node(self, cvs_directory, lod, create): """Open a writable node for CVS_DIRECTORY in LOD. Iff CREATE is True, create a directory node at SVN_PATH and any missing directories. Return an instance of _WritableMirrorNode. Raise KeyError if CVS_DIRECTORY doesn't exist and CREATE is not set.""" if cvs_directory.parent_directory is None: return self._open_writable_lod_node(lod, create) parent_node = self._open_writable_node( cvs_directory.parent_directory, lod, create ) try: node = parent_node[cvs_directory] except KeyError: if create: # The component does not exist, so we create it. new_node = self._create_empty_node() parent_node[cvs_directory] = new_node self._invoke_delegates('mkdir', lod, cvs_directory) return new_node else: raise else: if isinstance(node, _WritableMirrorNode): return node elif isinstance(node, _ReadOnlyMirrorNode): new_node = self._copy_node(node) parent_node[cvs_directory] = new_node return new_node else: raise InternalError( 'Attempt to modify file at %s in mirror' % (cvs_directory,) ) def delete_lod(self, lod): """Delete the main path for LOD from the tree. The path must currently exist. Silently refuse to delete trunk paths.""" if isinstance(lod, Trunk): # Never delete a Trunk path. return lod_history = self._get_lod_history(lod) if not lod_history.exists(): raise KeyError() lod_history.update(self._youngest, None) self._invoke_delegates('delete_lod', lod) def delete_path(self, cvs_path, lod, should_prune=False): """Delete CVS_PATH from LOD.""" if cvs_path.parent_directory is None: self.delete_lod(lod) return else: parent_node = self._open_writable_node( cvs_path.parent_directory, lod, False ) del parent_node[cvs_path] self._invoke_delegates('delete_path', lod, cvs_path) # The following recursion makes pruning an O(n^2) operation in the # worst case (where n is the depth of SVN_PATH), but the worst case # is probably rare, and the constant cost is pretty low. Another # drawback is that we issue a delete for each path and not just # a single delete for the topmost directory pruned. if should_prune and len(parent_node) == 0: self.delete_path(cvs_path.parent_directory, lod, True) def initialize_project(self, project): """Create the basic structure for PROJECT.""" self._invoke_delegates('initialize_project', project) self._open_writable_lod_node( Ctx()._symbol_db.get_symbol(project.trunk_id), create=True, invoke_delegates=False ) def change_path(self, cvs_rev): """Register a change in self._youngest for the CVS_REV's svn_path.""" # We do not have to update the nodes because our mirror is only # concerned with the presence or absence of paths, and a file # content change does not cause any path changes. self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False)) def add_path(self, cvs_rev): """Add the CVS_REV's svn_path to the repository mirror.""" cvs_file = cvs_rev.cvs_file parent_node = self._open_writable_node( cvs_file.parent_directory, cvs_rev.lod, True ) if cvs_file in parent_node: raise self.PathExistsError( 'Attempt to add path \'%s\' to repository mirror ' 'when it already exists in the mirror.' % (cvs_rev.get_svn_path(),) ) parent_node[cvs_file] = None self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True)) def copy_lod(self, src_lod, dest_lod, src_revnum): """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD. In the youngest revision of the repository, the destination LOD *must not* already exist. Return the new node at DEST_LOD. Note that this node is not necessarily writable, though its parent node necessarily is.""" dest_path = dest_lod.get_path() # Get the node of our src_path src_node = self._open_readonly_lod_node(src_lod, src_revnum) dest_lod_history = self._get_lod_history(dest_lod) if dest_lod_history.exists(): raise self.PathExistsError( "Attempt to add path '%s' to repository mirror " "when it already exists in the mirror." % dest_path ) dest_lod_history.update(self._youngest, src_node.id) self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum) # This is a cheap copy, so src_node has the same contents as the # new destination node. return src_node def copy_path( self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False ): """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD. In the youngest revision of the repository, the destination's parent *must* exist unless CREATE_PARENT is specified. But the destination itself *must not* exist. Return the new node at (CVS_PATH, DEST_LOD). Note that this node is not necessarily writable, though its parent node necessarily is.""" if cvs_path.parent_directory is None: return self.copy_lod(src_lod, dest_lod, src_revnum) # Get the node of our source, or None if it is a file: src_node = self._open_readonly_node(cvs_path, src_lod, src_revnum) # Get the parent path of the destination: try: dest_parent_node = self._open_writable_node( cvs_path.parent_directory, dest_lod, create_parent ) except KeyError: raise self.ParentMissingError( 'Attempt to add path \'%s\' to repository mirror, ' 'but its parent directory doesn\'t exist in the mirror.' % (dest_lod.get_path(cvs_path.cvs_path),) ) if cvs_path in dest_parent_node: raise self.PathExistsError( 'Attempt to add path \'%s\' to repository mirror ' 'when it already exists in the mirror.' % (dest_lod.get_path(cvs_path.cvs_path),) ) dest_parent_node[cvs_path] = src_node self._invoke_delegates( 'copy_path', src_lod.get_path(cvs_path.cvs_path), dest_lod.get_path(cvs_path.cvs_path), src_revnum ) # This is a cheap copy, so src_node has the same contents as the # new destination node. return src_node def fill_symbol(self, svn_symbol_commit, fill_source): """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT. The symbolic name is guaranteed to exist in the Subversion repository by the end of this call, even if there are no paths under it.""" symbol = svn_symbol_commit.symbol try: dest_node = self._open_writable_lod_node(symbol, False) except KeyError: dest_node = None self._fill_directory(symbol, dest_node, fill_source, None) def _prune_extra_entries( self, dest_cvs_path, symbol, dest_node, src_entries ): """Delete any entries in DEST_NODE that are not in SRC_ENTRIES. This might require creating a new writable node, so return a possibly-modified dest_node.""" delete_list = [ cvs_path for cvs_path in dest_node if cvs_path not in src_entries ] if delete_list: if not isinstance(dest_node, _WritableMirrorNode): dest_node = self._open_writable_node(dest_cvs_path, symbol, False) # Sort the delete list so that the output is in a consistent # order: delete_list.sort() for cvs_path in delete_list: del dest_node[cvs_path] self._invoke_delegates('delete_path', symbol, cvs_path) return dest_node def _fill_directory(self, symbol, dest_node, fill_source, parent_source): """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE. Use items from FILL_SOURCE, and recurse into the child items. Fill SYMBOL starting at the path FILL_SOURCE.cvs_path. DEST_NODE is the node of this destination path, or None if the destination does not yet exist. All directories above this path have already been filled. FILL_SOURCE is a FillSource instance describing the items within a subtree of the repository that still need to be copied to the destination. PARENT_SOURCE is the SVNRevisionRange that was used to copy the parent directory, if it was copied in this commit. We prefer to copy from the same source as was used for the parent, since it typically requires less touching-up. If PARENT_SOURCE is None, then the parent directory was not copied in this commit, so no revision is preferable to any other.""" copy_source = fill_source.compute_best_source(parent_source) # Figure out if we shall copy to this destination and delete any # destination path that is in the way. if dest_node is None: # The destination does not exist at all, so it definitely has to # be copied: dest_node = self.copy_path( fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum ) elif (parent_source is not None) and ( copy_source.source_lod != parent_source.source_lod or copy_source.opening_revnum != parent_source.opening_revnum ): # The parent path was copied from a different source than we # need to use, so we have to delete the version that was copied # with the parent then re-copy from the correct source: self.delete_path(fill_source.cvs_path, symbol) dest_node = self.copy_path( fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum ) else: copy_source = parent_source # Get the map {entry : FillSource} for entries within this # directory that need filling. src_entries = {} for (cvs_path, fill_subsource) in fill_source.get_subsources(): src_entries[cvs_path] = fill_subsource if copy_source is not None: dest_node = self._prune_extra_entries( fill_source.cvs_path, symbol, dest_node, src_entries ) # Recurse into the SRC_ENTRIES ids sorted in alphabetical order. cvs_paths = src_entries.keys() cvs_paths.sort() for cvs_path in cvs_paths: if isinstance(cvs_path, CVSDirectory): # Path is a CVSDirectory: try: dest_subnode = dest_node[cvs_path] except KeyError: # Path didn't exist at all; it has to be created: dest_subnode = None self._fill_directory( symbol, dest_subnode, src_entries[cvs_path], copy_source ) else: # Path is a CVSFile: self._fill_file( symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source ) def _fill_file(self, symbol, dest_existed, fill_source, parent_source): """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE. Use items from FILL_SOURCE. Fill SYMBOL at path FILL_SOURCE.cvs_path. DEST_NODE is the node of this destination path, or None if the destination does not yet exist. All directories above this path have already been filled as needed. FILL_SOURCE is a FillSource instance describing the item that needs to be copied to the destination. PARENT_SOURCE is the source from which the parent directory was copied, or None if the parent directory was not copied during this commit. We prefer to copy from PARENT_SOURCE, since it typically requires less touching-up. If PARENT_SOURCE is None, then the parent directory was not copied in this commit, so no revision is preferable to any other.""" copy_source = fill_source.compute_best_source(parent_source) # Figure out if we shall copy to this destination and delete any # destination path that is in the way. if not dest_existed: # The destination does not exist at all, so it definitely has to # be copied: self.copy_path( fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum ) elif (parent_source is not None) and ( copy_source.source_lod != parent_source.source_lod or copy_source.opening_revnum != parent_source.opening_revnum ): # The parent path was copied from a different source than we # need to use, so we have to delete the version that was copied # with the parent and then re-copy from the correct source: self.delete_path(fill_source.cvs_path, symbol) self.copy_path( fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum ) def add_delegate(self, delegate): """Adds DELEGATE to self._delegates. For every delegate you add, as soon as SVNRepositoryMirror performs a repository action method, SVNRepositoryMirror will call the delegate's corresponding repository action method. Multiple delegates will be called in the order that they are added. See SVNRepositoryMirrorDelegate for more information.""" self._delegates.append(delegate) def _invoke_delegates(self, method, *args): """Invoke a method on each delegate. Iterate through each of our delegates, in the order that they were added, and call the delegate's method named METHOD with the arguments in ARGS.""" for delegate in self._delegates: getattr(delegate, method)(*args) def close(self): """Call the delegate finish methods and close databases.""" self._invoke_delegates('finish') self._lod_histories = None self._nodes_db.close() self._nodes_db = None
class PersistenceManager: """The PersistenceManager allows us to effectively store SVNCommits to disk and retrieve them later using only their subversion revision number as the key. It also returns the subversion revision number for a given CVSRevision's unique key. All information pertinent to each SVNCommit is stored in a series of on-disk databases so that SVNCommits can be retrieved on-demand. MODE is one of the constants DB_OPEN_NEW or DB_OPEN_READ. In 'new' mode, PersistenceManager will initialize a new set of on-disk databases and be fully-featured. In 'read' mode, PersistenceManager will open existing on-disk databases and the set_* methods will be unavailable.""" def __init__(self, mode): self.mode = mode if mode not in (DB_OPEN_NEW, DB_OPEN_READ): raise RuntimeError, "Invalid 'mode' argument to PersistenceManager" primer = ( SVNInitialProjectCommit, SVNPrimaryCommit, SVNPostCommit, SVNBranchCommit, SVNTagCommit, ) serializer = PrimedPickleSerializer(primer) self.svn_commit_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_COMMITS_INDEX_TABLE), artifact_manager.get_temp_file(config.SVN_COMMITS_STORE), mode, serializer) self.cvs2svn_db = RecordTable( artifact_manager.get_temp_file(config.CVS_REVS_TO_SVN_REVNUMS), mode, SignedIntegerPacker(SVN_INVALID_REVNUM)) def get_svn_revnum(self, cvs_rev_id): """Return the Subversion revision number in which CVS_REV_ID was committed, or SVN_INVALID_REVNUM if there is no mapping for CVS_REV_ID.""" return self.cvs2svn_db.get(cvs_rev_id, SVN_INVALID_REVNUM) def get_svn_commit(self, svn_revnum): """Return an SVNCommit that corresponds to SVN_REVNUM. If no SVNCommit exists for revnum SVN_REVNUM, then return None.""" return self.svn_commit_db.get(svn_revnum, None) def put_svn_commit(self, svn_commit): """Record the bidirectional mapping between SVN_REVNUM and CVS_REVS and record associated attributes.""" if self.mode == DB_OPEN_READ: raise RuntimeError, \ 'Write operation attempted on read-only PersistenceManager' self.svn_commit_db[svn_commit.revnum] = svn_commit if isinstance(svn_commit, SVNRevisionCommit): for cvs_rev in svn_commit.cvs_revs: self.cvs2svn_db[cvs_rev.id] = svn_commit.revnum def close(self): self.cvs2svn_db.close() self.cvs2svn_db = None self.svn_commit_db.close() self.svn_commit_db = None
class SVNRepositoryMirror: """Mirror a Subversion repository and its history. Mirror a Subversion repository as it is constructed, one SVNCommit at a time. For each LineOfDevelopment we store a skeleton of the directory structure within that LOD for each SVN revision number in which it changed. The creation of a dumpfile or Subversion repository is handled by delegates. See the add_delegate() method for how to set delegates. For each LOD that has been seen so far, an LODHistory instance is stored in self._lod_histories. An LODHistory keeps track of each SVNRevision in which files were added to or deleted from that LOD, as well as the node id of the node tree describing the LOD contents at that SVN revision. The LOD trees themselves are stored in the _nodes_db database, which maps node ids to nodes. A node is a map from CVSPath.id to ids of the corresponding subnodes. The _nodes_db is stored on disk and each access is expensive. The _nodes_db database only holds the nodes for old revisions. The revision that is being constructed is kept in memory in the _new_nodes map, which is cheap to access. You must invoke start_commit() before each SVNCommit and end_commit() afterwards. *** WARNING *** Path arguments to methods in this class MUST NOT have leading or trailing slashes.""" class ParentMissingError(Exception): """The parent of a path is missing. Exception raised if an attempt is made to add a path to the repository mirror but the parent's path doesn't exist in the youngest revision of the repository.""" pass class PathExistsError(Exception): """The path already exists in the repository. Exception raised if an attempt is made to add a path to the repository mirror and that path already exists in the youngest revision of the repository.""" pass def register_artifacts(self, which_pass): """Register the artifacts that will be needed for this object.""" artifact_manager.register_temp_file( config.SVN_MIRROR_NODES_INDEX_TABLE, which_pass) artifact_manager.register_temp_file(config.SVN_MIRROR_NODES_STORE, which_pass) def open(self): """Set up the SVNRepositoryMirror and prepare it for SVNCommits.""" self._key_generator = KeyGenerator() self._delegates = [] # A map from LOD to LODHistory instance for all LODs that have # been defines so far: self._lod_histories = {} # This corresponds to the 'nodes' table in a Subversion fs. (We # don't need a 'representations' or 'strings' table because we # only track metadata, not file contents.) self._nodes_db = IndexedDatabase( artifact_manager.get_temp_file(config.SVN_MIRROR_NODES_STORE), artifact_manager.get_temp_file( config.SVN_MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=_NodeSerializer()) # Start at revision 0 without a root node. It will be created # by _open_writable_root_node. self._youngest = 0 def start_commit(self, revnum, revprops): """Start a new commit.""" self._youngest = revnum # A map {node_id : {CVSPath : node_id}}. self._new_nodes = {} self._invoke_delegates('start_commit', revnum, revprops) def end_commit(self): """Called at the end of each commit. This method copies the newly created nodes to the on-disk nodes db.""" # Copy the new nodes to the _nodes_db for id, value in self._new_nodes.items(): self._nodes_db[id] = value del self._new_nodes self._invoke_delegates('end_commit') def _get_lod_history(self, lod): """Return the LODHistory instance describing LOD. Create a new (empty) LODHistory if it doesn't yet exist.""" try: return self._lod_histories[lod] except KeyError: lod_history = LODHistory() self._lod_histories[lod] = lod_history return lod_history def _create_empty_node(self): """Create and return a new, empty, writable node.""" new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), {}) self._new_nodes[new_node.id] = new_node.entries return new_node def _copy_node(self, old_node): """Create and return a new, writable node that is a copy of OLD_NODE.""" new_node = _WritableMirrorNode(self, self._key_generator.gen_id(), old_node.entries.copy()) self._new_nodes[new_node.id] = new_node.entries return new_node def _get_node(self, id): """Return the node for id ID. The node might be read from either self._nodes_db or self._new_nodes. Return an instance of _MirrorNode.""" try: return _WritableMirrorNode(self, id, self._new_nodes[id]) except KeyError: return _ReadOnlyMirrorNode(self, id, self._nodes_db[id]) def _open_readonly_lod_node(self, lod, revnum): """Open a readonly node for the root path of LOD at revision REVNUM. Return an instance of _MirrorNode if the path exists; otherwise, raise KeyError.""" lod_history = self._get_lod_history(lod) node_id = lod_history.get_id(revnum) return self._get_node(node_id) def _open_readonly_node(self, cvs_path, lod, revnum): """Open a readonly node for CVS_PATH from LOD at REVNUM. If cvs_path refers to a leaf node, return None. Raise KeyError if the node does not exist.""" if cvs_path.parent_directory is None: return self._open_readonly_lod_node(lod, revnum) else: parent_node = self._open_readonly_node(cvs_path.parent_directory, lod, revnum) return parent_node[cvs_path] def _open_writable_lod_node(self, lod, create, invoke_delegates=True): """Open a writable node for the root path in LOD. Iff CREATE is True, create the path and any missing directories. Return an instance of _WritableMirrorNode. Raise KeyError if the path doesn't already exist and CREATE is not set.""" lod_history = self._get_lod_history(lod) try: id = lod_history.get_id() except KeyError: if create: node = self._create_empty_node() lod_history.update(self._youngest, node.id) if invoke_delegates: self._invoke_delegates('initialize_lod', lod) else: raise else: node = self._get_node(id) if not isinstance(node, _WritableMirrorNode): # Node was created in an earlier revision, so we have to copy # it to make it writable: node = self._copy_node(node) lod_history.update(self._youngest, node.id) return node def _open_writable_node(self, cvs_directory, lod, create): """Open a writable node for CVS_DIRECTORY in LOD. Iff CREATE is True, create a directory node at SVN_PATH and any missing directories. Return an instance of _WritableMirrorNode. Raise KeyError if CVS_DIRECTORY doesn't exist and CREATE is not set.""" if cvs_directory.parent_directory is None: return self._open_writable_lod_node(lod, create) parent_node = self._open_writable_node(cvs_directory.parent_directory, lod, create) try: node = parent_node[cvs_directory] except KeyError: if create: # The component does not exist, so we create it. new_node = self._create_empty_node() parent_node[cvs_directory] = new_node self._invoke_delegates('mkdir', lod, cvs_directory) return new_node else: raise else: if isinstance(node, _WritableMirrorNode): return node elif isinstance(node, _ReadOnlyMirrorNode): new_node = self._copy_node(node) parent_node[cvs_directory] = new_node return new_node else: raise InternalError('Attempt to modify file at %s in mirror' % (cvs_directory, )) def delete_lod(self, lod): """Delete the main path for LOD from the tree. The path must currently exist. Silently refuse to delete trunk paths.""" if isinstance(lod, Trunk): # Never delete a Trunk path. return lod_history = self._get_lod_history(lod) if not lod_history.exists(): raise KeyError() lod_history.update(self._youngest, None) self._invoke_delegates('delete_lod', lod) def delete_path(self, cvs_path, lod, should_prune=False): """Delete CVS_PATH from LOD.""" if cvs_path.parent_directory is None: self.delete_lod(lod) return else: parent_node = self._open_writable_node(cvs_path.parent_directory, lod, False) del parent_node[cvs_path] self._invoke_delegates('delete_path', lod, cvs_path) # The following recursion makes pruning an O(n^2) operation in the # worst case (where n is the depth of SVN_PATH), but the worst case # is probably rare, and the constant cost is pretty low. Another # drawback is that we issue a delete for each path and not just # a single delete for the topmost directory pruned. if should_prune and len(parent_node) == 0: self.delete_path(cvs_path.parent_directory, lod, True) def initialize_project(self, project): """Create the basic structure for PROJECT.""" self._invoke_delegates('initialize_project', project) self._open_writable_lod_node(Ctx()._symbol_db.get_symbol( project.trunk_id), create=True, invoke_delegates=False) def change_path(self, cvs_rev): """Register a change in self._youngest for the CVS_REV's svn_path.""" # We do not have to update the nodes because our mirror is only # concerned with the presence or absence of paths, and a file # content change does not cause any path changes. self._invoke_delegates('change_path', SVNCommitItem(cvs_rev, False)) def add_path(self, cvs_rev): """Add the CVS_REV's svn_path to the repository mirror.""" cvs_file = cvs_rev.cvs_file parent_node = self._open_writable_node(cvs_file.parent_directory, cvs_rev.lod, True) if cvs_file in parent_node: raise self.PathExistsError( 'Attempt to add path \'%s\' to repository mirror ' 'when it already exists in the mirror.' % (cvs_rev.get_svn_path(), )) parent_node[cvs_file] = None self._invoke_delegates('add_path', SVNCommitItem(cvs_rev, True)) def copy_lod(self, src_lod, dest_lod, src_revnum): """Copy all of SRC_LOD at SRC_REVNUM to DST_LOD. In the youngest revision of the repository, the destination LOD *must not* already exist. Return the new node at DEST_LOD. Note that this node is not necessarily writable, though its parent node necessarily is.""" dest_path = dest_lod.get_path() # Get the node of our src_path src_node = self._open_readonly_lod_node(src_lod, src_revnum) dest_lod_history = self._get_lod_history(dest_lod) if dest_lod_history.exists(): raise self.PathExistsError( "Attempt to add path '%s' to repository mirror " "when it already exists in the mirror." % dest_path) dest_lod_history.update(self._youngest, src_node.id) self._invoke_delegates('copy_lod', src_lod, dest_lod, src_revnum) # This is a cheap copy, so src_node has the same contents as the # new destination node. return src_node def copy_path(self, cvs_path, src_lod, dest_lod, src_revnum, create_parent=False): """Copy CVS_PATH from SRC_LOD at SRC_REVNUM to DST_LOD. In the youngest revision of the repository, the destination's parent *must* exist unless CREATE_PARENT is specified. But the destination itself *must not* exist. Return the new node at (CVS_PATH, DEST_LOD). Note that this node is not necessarily writable, though its parent node necessarily is.""" if cvs_path.parent_directory is None: return self.copy_lod(src_lod, dest_lod, src_revnum) # Get the node of our source, or None if it is a file: src_node = self._open_readonly_node(cvs_path, src_lod, src_revnum) # Get the parent path of the destination: try: dest_parent_node = self._open_writable_node( cvs_path.parent_directory, dest_lod, create_parent) except KeyError: raise self.ParentMissingError( 'Attempt to add path \'%s\' to repository mirror, ' 'but its parent directory doesn\'t exist in the mirror.' % (dest_lod.get_path(cvs_path.cvs_path), )) if cvs_path in dest_parent_node: raise self.PathExistsError( 'Attempt to add path \'%s\' to repository mirror ' 'when it already exists in the mirror.' % (dest_lod.get_path(cvs_path.cvs_path), )) dest_parent_node[cvs_path] = src_node self._invoke_delegates('copy_path', src_lod.get_path(cvs_path.cvs_path), dest_lod.get_path(cvs_path.cvs_path), src_revnum) # This is a cheap copy, so src_node has the same contents as the # new destination node. return src_node def fill_symbol(self, svn_symbol_commit, fill_source): """Perform all copies for the CVSSymbols in SVN_SYMBOL_COMMIT. The symbolic name is guaranteed to exist in the Subversion repository by the end of this call, even if there are no paths under it.""" symbol = svn_symbol_commit.symbol try: dest_node = self._open_writable_lod_node(symbol, False) except KeyError: dest_node = None self._fill_directory(symbol, dest_node, fill_source, None) def _prune_extra_entries(self, dest_cvs_path, symbol, dest_node, src_entries): """Delete any entries in DEST_NODE that are not in SRC_ENTRIES. This might require creating a new writable node, so return a possibly-modified dest_node.""" delete_list = [ cvs_path for cvs_path in dest_node if cvs_path not in src_entries ] if delete_list: if not isinstance(dest_node, _WritableMirrorNode): dest_node = self._open_writable_node(dest_cvs_path, symbol, False) # Sort the delete list so that the output is in a consistent # order: delete_list.sort() for cvs_path in delete_list: del dest_node[cvs_path] self._invoke_delegates('delete_path', symbol, cvs_path) return dest_node def _fill_directory(self, symbol, dest_node, fill_source, parent_source): """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE. Use items from FILL_SOURCE, and recurse into the child items. Fill SYMBOL starting at the path FILL_SOURCE.cvs_path. DEST_NODE is the node of this destination path, or None if the destination does not yet exist. All directories above this path have already been filled. FILL_SOURCE is a FillSource instance describing the items within a subtree of the repository that still need to be copied to the destination. PARENT_SOURCE is the SVNRevisionRange that was used to copy the parent directory, if it was copied in this commit. We prefer to copy from the same source as was used for the parent, since it typically requires less touching-up. If PARENT_SOURCE is None, then the parent directory was not copied in this commit, so no revision is preferable to any other.""" copy_source = fill_source.compute_best_source(parent_source) # Figure out if we shall copy to this destination and delete any # destination path that is in the way. if dest_node is None: # The destination does not exist at all, so it definitely has to # be copied: dest_node = self.copy_path(fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum) elif (parent_source is not None) and ( copy_source.source_lod != parent_source.source_lod or copy_source.opening_revnum != parent_source.opening_revnum): # The parent path was copied from a different source than we # need to use, so we have to delete the version that was copied # with the parent then re-copy from the correct source: self.delete_path(fill_source.cvs_path, symbol) dest_node = self.copy_path(fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum) else: copy_source = parent_source # Get the map {entry : FillSource} for entries within this # directory that need filling. src_entries = {} for (cvs_path, fill_subsource) in fill_source.get_subsources(): src_entries[cvs_path] = fill_subsource if copy_source is not None: dest_node = self._prune_extra_entries(fill_source.cvs_path, symbol, dest_node, src_entries) # Recurse into the SRC_ENTRIES ids sorted in alphabetical order. cvs_paths = src_entries.keys() cvs_paths.sort() for cvs_path in cvs_paths: if isinstance(cvs_path, CVSDirectory): # Path is a CVSDirectory: try: dest_subnode = dest_node[cvs_path] except KeyError: # Path didn't exist at all; it has to be created: dest_subnode = None self._fill_directory(symbol, dest_subnode, src_entries[cvs_path], copy_source) else: # Path is a CVSFile: self._fill_file(symbol, cvs_path in dest_node, src_entries[cvs_path], copy_source) def _fill_file(self, symbol, dest_existed, fill_source, parent_source): """Fill the tag or branch SYMBOL at the path indicated by FILL_SOURCE. Use items from FILL_SOURCE. Fill SYMBOL at path FILL_SOURCE.cvs_path. DEST_NODE is the node of this destination path, or None if the destination does not yet exist. All directories above this path have already been filled as needed. FILL_SOURCE is a FillSource instance describing the item that needs to be copied to the destination. PARENT_SOURCE is the source from which the parent directory was copied, or None if the parent directory was not copied during this commit. We prefer to copy from PARENT_SOURCE, since it typically requires less touching-up. If PARENT_SOURCE is None, then the parent directory was not copied in this commit, so no revision is preferable to any other.""" copy_source = fill_source.compute_best_source(parent_source) # Figure out if we shall copy to this destination and delete any # destination path that is in the way. if not dest_existed: # The destination does not exist at all, so it definitely has to # be copied: self.copy_path(fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum) elif (parent_source is not None) and ( copy_source.source_lod != parent_source.source_lod or copy_source.opening_revnum != parent_source.opening_revnum): # The parent path was copied from a different source than we # need to use, so we have to delete the version that was copied # with the parent and then re-copy from the correct source: self.delete_path(fill_source.cvs_path, symbol) self.copy_path(fill_source.cvs_path, copy_source.source_lod, symbol, copy_source.opening_revnum) def add_delegate(self, delegate): """Adds DELEGATE to self._delegates. For every delegate you add, as soon as SVNRepositoryMirror performs a repository action method, SVNRepositoryMirror will call the delegate's corresponding repository action method. Multiple delegates will be called in the order that they are added. See SVNRepositoryMirrorDelegate for more information.""" self._delegates.append(delegate) def _invoke_delegates(self, method, *args): """Invoke a method on each delegate. Iterate through each of our delegates, in the order that they were added, and call the delegate's method named METHOD with the arguments in ARGS.""" for delegate in self._delegates: getattr(delegate, method)(*args) def close(self): """Call the delegate finish methods and close databases.""" self._invoke_delegates('finish') self._lod_histories = None self._nodes_db.close() self._nodes_db = None
class _NodeDatabase(object): """A database storing all of the directory nodes. The nodes are written in groups every time write_new_nodes() is called. To the database is written a dictionary {node_id : [(cvs_path.id, node_id),...]}, where the keys are the node_ids of the new nodes. When a node is read, its whole group is read and cached under the assumption that the other nodes in the group are likely to be needed soon. The cache is retained across revisions and cleared when _cache_max_size is exceeded. The dictionaries for nodes that have been read from the database during the current revision are cached by node_id in the _cache member variable. The corresponding dictionaries are *not* copied when read. To avoid cross-talk between distinct MirrorDirectory instances that have the same node_id, users of these dictionaries have to copy them before modification.""" # How many entries should be allowed in the cache for each # CVSDirectory in the repository. (This number is very roughly the # number of complete lines of development that can be stored in the # cache at one time.) CACHE_SIZE_MULTIPLIER = 5 # But the cache will never be limited to less than this number: MIN_CACHE_LIMIT = 5000 def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db self.db = IndexedDatabase( artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=MarshalSerializer(), ) # A list of the maximum node_id stored by each call to # write_new_nodes(): self._max_node_ids = [0] # A map {node_id : {cvs_path : node_id}}: self._cache = {} # The number of directories in the repository: num_dirs = len([ cvs_path for cvs_path in self.cvs_file_db.itervalues() if isinstance(cvs_path, CVSDirectory) ]) self._cache_max_size = max( int(self.CACHE_SIZE_MULTIPLIER * num_dirs), self.MIN_CACHE_LIMIT, ) def _load(self, items): retval = {} for (id, value) in items: retval[self.cvs_file_db.get_file(id)] = value return retval def _dump(self, node): return [ (cvs_path.id, value) for (cvs_path, value) in node.iteritems() ] def _determine_index(self, id): """Return the index of the record holding the node with ID.""" return bisect.bisect_left(self._max_node_ids, id) def __getitem__(self, id): try: items = self._cache[id] except KeyError: index = self._determine_index(id) for (node_id, items) in self.db[index].items(): self._cache[node_id] = self._load(items) items = self._cache[id] return items def write_new_nodes(self, nodes): """Write NODES to the database. NODES is an iterable of writable CurrentMirrorDirectory instances.""" if len(self._cache) > self._cache_max_size: # The size of the cache has exceeded the threshold. Discard the # old cache values (but still store the new nodes into the # cache): Log().debug('Clearing node cache') self._cache.clear() data = {} max_node_id = 0 for node in nodes: max_node_id = max(max_node_id, node.id) data[node.id] = self._dump(node._entries) self._cache[node.id] = node._entries self.db[len(self._max_node_ids)] = data if max_node_id == 0: # Rewrite last value: self._max_node_ids.append(self._max_node_ids[-1]) else: self._max_node_ids.append(max_node_id) def close(self): self._cache.clear() self.db.close() self.db = None