class _NodeSerializer(MarshalSerializer): def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db def _dump(self, node): return [ (cvs_path.id, value) for (cvs_path, value) in node.iteritems() ] def dumpf(self, f, node): MarshalSerializer.dumpf(self, f, self._dump(node)) def dumps(self, node): return MarshalSerializer.dumps(self, self._dump(node)) def _load(self, items): retval = {} for (id, value) in items: retval[self.cvs_file_db.get_file(id)] = value return retval def loadf(self, f): return self._load(MarshalSerializer.loadf(self, f)) def loads(self, s): return self._load(MarshalSerializer.loads(self, s))
class _NodeSerializer(MarshalSerializer): def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db def _dump(self, node): return [(cvs_path.id, value) for (cvs_path, value) in node.iteritems()] def dumpf(self, f, node): MarshalSerializer.dumpf(self, f, self._dump(node)) def dumps(self, node): return MarshalSerializer.dumps(self, self._dump(node)) def _load(self, items): retval = {} for (id, value) in items: retval[self.cvs_file_db.get_file(id)] = value return retval def loadf(self, f): return self._load(MarshalSerializer.loadf(self, f)) def loads(self, s): return self._load(MarshalSerializer.loads(self, s))
class _NodeDatabase(object): """A database storing all of the directory nodes. The nodes are written in groups every time write_new_nodes() is called. To the database is written a dictionary {node_id : [(cvs_path.id, node_id),...]}, where the keys are the node_ids of the new nodes. When a node is read, its whole group is read and cached under the assumption that the other nodes in the group are likely to be needed soon. The cache is retained across revisions and cleared when _cache_max_size is exceeded. The dictionaries for nodes that have been read from the database during the current revision are cached by node_id in the _cache member variable. The corresponding dictionaries are *not* copied when read. To avoid cross-talk between distinct MirrorDirectory instances that have the same node_id, users of these dictionaries have to copy them before modification.""" # How many entries should be allowed in the cache for each # CVSDirectory in the repository. (This number is very roughly the # number of complete lines of development that can be stored in the # cache at one time.) CACHE_SIZE_MULTIPLIER = 5 # But the cache will never be limited to less than this number: MIN_CACHE_LIMIT = 5000 def __init__(self): self.cvs_file_db = Ctx()._cvs_file_db self.db = IndexedDatabase( artifact_manager.get_temp_file(config.MIRROR_NODES_STORE), artifact_manager.get_temp_file(config.MIRROR_NODES_INDEX_TABLE), DB_OPEN_NEW, serializer=MarshalSerializer(), ) # A list of the maximum node_id stored by each call to # write_new_nodes(): self._max_node_ids = [0] # A map {node_id : {cvs_path : node_id}}: self._cache = {} # The number of directories in the repository: num_dirs = len([ cvs_path for cvs_path in self.cvs_file_db.itervalues() if isinstance(cvs_path, CVSDirectory) ]) self._cache_max_size = max( int(self.CACHE_SIZE_MULTIPLIER * num_dirs), self.MIN_CACHE_LIMIT, ) def _load(self, items): retval = {} for (id, value) in items: retval[self.cvs_file_db.get_file(id)] = value return retval def _dump(self, node): return [ (cvs_path.id, value) for (cvs_path, value) in node.iteritems() ] def _determine_index(self, id): """Return the index of the record holding the node with ID.""" return bisect.bisect_left(self._max_node_ids, id) def __getitem__(self, id): try: items = self._cache[id] except KeyError: index = self._determine_index(id) for (node_id, items) in self.db[index].items(): self._cache[node_id] = self._load(items) items = self._cache[id] return items def write_new_nodes(self, nodes): """Write NODES to the database. NODES is an iterable of writable CurrentMirrorDirectory instances.""" if len(self._cache) > self._cache_max_size: # The size of the cache has exceeded the threshold. Discard the # old cache values (but still store the new nodes into the # cache): Log().debug('Clearing node cache') self._cache.clear() data = {} max_node_id = 0 for node in nodes: max_node_id = max(max_node_id, node.id) data[node.id] = self._dump(node._entries) self._cache[node.id] = node._entries self.db[len(self._max_node_ids)] = data if max_node_id == 0: # Rewrite last value: self._max_node_ids.append(self._max_node_ids[-1]) else: self._max_node_ids.append(max_node_id) def close(self): self._cache.clear() self.db.close() self.db = None