class LockManager(object): """ Thread safe lock manager class """ def __init__(self): self._lock = _get_lock() self._lock_store = WeakValueDictionary() def get_lock(self, lockname, reentrant=False): """ Create or return a existing lock identified by lockname. """ with self._lock: try: lock = self._lock_store[lockname] logging.debug("LockManager.get_lock: existing lock: %s, %s", lockname, lock) except KeyError: lock = _get_lock(reentrant) self._lock_store[lockname] = lock logging.debug("LockManager.get_lock: new lock: %s, %s", lockname, lock) logging.debug("LockManager existing locks in store: %s", self._list_locks()) return lock def _list_locks(self): return self._lock_store.keys() def list_locks(self): """ Return a list of existing lock names in lock store. """ with self._lock: return self._list_locks()
class LockManager(object): """ Thread safe lock manager class """ def __init__(self): self._lock = get_lock() self._lock_store = WeakValueDictionary() def get_lock(self, lockname, reentrant=False): """ Create or return a existing lock identified by lockname. """ with self._lock: try: lock = self._lock_store[lockname] logging.debug("LockManager.get_lock: existing lock: %s, %s", lockname, lock) except KeyError: lock = get_lock(reentrant) self._lock_store[lockname] = lock logging.debug("LockManager.get_lock: new lock: %s, %s", lockname, lock) logging.debug("LockManager existing locks in store: %s", self._list_locks()) return lock def _list_locks(self): return self._lock_store.keys() def list_locks(self): """ Return a list of existing lock names in lock store. """ with self._lock: return self._list_locks()
def test_weak_value_dictionary(): class Person: def __init__(self, name): self.name = name def __repr__(self): return self.name a, b, c, = Person('Alice'), Person('Bob'), Person('Charlie') wvd = WeakValueDictionary() wvd[str(a)] = a wvd[str(b)] = b wvd[str(c)] = c assert sorted(wvd.keys()) == sorted(['Alice', 'Bob', 'Charlie']) del a assert sorted(wvd.keys()) == sorted(['Bob', 'Charlie'])
def __nullify_object(self, dc_object): """ @postcondition: Set all fields to none to allow GC action """ metaclass = dc_object.get_class_extradata() self.logger.debug("[==GC==] Going to clean object %s", dc_object.get_object_id()) # Put here because it is critical path and I prefer to have a single isEnabledFor # instead of checking it for each element if self.logger.isEnabledFor(logging.DEBUG): held_objects = WeakValueDictionary() o = None prop_name_list = metaclass.properties.keys() self.logger.debug( "The following attributes will be nullified from object %s: %s", dc_object.get_object_id(), ", ".join(prop_name_list)) for prop_name in prop_name_list: real_prop_name = "%s%s" % (DCLAY_PROPERTY_PREFIX, prop_name) try: o = object.__getattribute__(dc_object, real_prop_name) held_objects[prop_name] = o except TypeError: # Some objects cannot be weakreferenced, but we can typically ignore them self.logger.trace("Ignoring attribute %s of type %s", prop_name, type(o)) # Ensure we don't keep that as a dangling active backref del o # Critical path, keep it short! for prop_name in metaclass.properties.keys(): real_prop_name = "%s%s" % (DCLAY_PROPERTY_PREFIX, prop_name) object.__setattr__(dc_object, real_prop_name, None) if self.logger.isEnabledFor(logging.DEBUG): # held_objects variable will be defined when TRACE-enabled. held_attr_names = held_objects.keys() if held_attr_names: self.logger.debug( "The following attributes of object %s still have a backref active: %s", dc_object.get_object_id(), ", ".join(held_attr_names)) else: self.logger.debug( "The garbage collector seems to have cleaned all the nullified attributes on %s", dc_object.get_object_id())
class ThreadLocalEntityCache(local): def __init__(self): self.lock = Lock() self._dict = WeakValueDictionary() def __contains__(self, key): return key in self._dict def __getitem__(self, key): return self._dict[key] def get(self, key, default=None): return self._dict.get(key, default) def clear(self): self._dict.clear() def keys(self): return self._dict.keys() def update(self, key, value): """ Extract, insert or remove a value for a given key. """ with self.lock: if value is None: # remove try: del self._dict[key] except KeyError: pass else: return None elif callable(value): try: # extract return self._dict[key] except KeyError: # construct and insert new_value = value() self._dict[key] = new_value return new_value else: # insert or replace self._dict[key] = value return value
class WrappedThreadPoolExecutor(ThreadPoolExecutor, EventReactorMixin): '''Wraps a :class:`.ThreadPoolExecutor` that listens to a stop event''' def __init__(self, max_workers, event_reactor): ThreadPoolExecutor.__init__(self, max_workers) EventReactorMixin.__init__(self, event_reactor) event_reactor.register_handler(EventReactor.STOP_ID, self._stop_cb) self._task_map = WeakValueDictionary() def _stop_cb(self, event_id): _logger.debug('WrappedThreadPoolExecutor stopping everything') for key in self._task_map.keys(): self._task_map[key].stop() self.shutdown(wait=False) def submit(self, fn, *args, **kwargs): if isinstance(fn, Task): self._task_map[id(fn)] = fn return ThreadPoolExecutor.submit(self, fn, *args, **kwargs)
class EventManager(object): def __init__(self): self.listeners = Wkd() def add_listener(self, listener, name): self.listeners[name] = listener def remove_listener(self, listener): if listener in self.listeners.keys(): del self.listeners[listener] def post(self, event): en = event.name # if event.name != EV_TICK and event.name != EV_PLAYER_MOVE: # pass #print(type(event)) try: # All listeners if en == EV_TICK or \ en == EV_QUIT or \ en == EV_INIT or \ en == EV_RESIZE: for val in self.listeners.values(): val.notify(event) # Game Engine only elif en == EV_INPUT or en == EV_MOUSE_MOVE or en == EV_MOUSE_CLICK: self.listeners["Game Engine"].notify(event) # Renderer only elif en == EV_PLAYER_MOVE or \ en == EV_PLAYER_STATS or \ en == EV_MODEL_SHARE or \ en == EV_SWORD_SWING: self.listeners["Renderer"].notify(event) except KeyError as ke: print("Error:", ke.message)
def restartProcessGroup(self, name): '''Restart all procs in supervisor process group .. rapidly! Returns a list of rpc faults if an error occurs. @param string name name of process group to restart @return boolean result true if successful ''' self._update('restartProcessGroup') group = self.supervisord.process_groups.get(name) if group is None: raise RPCError(RestarterFaults.BAD_GROUP) transit_states = (STARTING,STOPPING) processes = WeakValueDictionary((p.config.name,p) for p in group.processes.itervalues()) allprocs = set(processes.keys()) procnames = [p.config.name for p in group.get_unstopped_processes()] unstopped = set(procnames) started = set() ignore = set() errs = list() timer = Timer() def get_proc(name): try: return processes[name] except KeyError: if name in procnames: procnames.remove(name) unstopped.discard(name) started.discard(name) ignore.discard(name) def restartem(): loop_count = timer.inc_counter() # stagger_factor is how "often" we stop procs # 2 = every other call # 3 = every third call, etc stagger = min(self.stagger_factor or 1,len(unstopped) or 1) stop_modulus = loop_count % stagger if not timer.is_started(): timer.start() elif timer.elapsed() > self.timeout: nremaining = (len(allprocs) - len(started.union(ignore))) + len(unstopped) e = RPCError(RestarterFaults.TIMEOUT, 'timeout expired after %.1f seconds, loop count %d, %d procs pending restart' % \ (timer.elapsed(),loop_count,nremaining)) if errs: errs.append(e) return errs raise e for name in sorted(allprocs): p = get_proc(name) if p is None: continue if name not in unstopped and name not in started and name not in ignore: state = p.get_state() if state == BACKOFF: if loop_count > 0: errs.append(RPCError(RestarterFaults.START_FAILED, '%s: process failing startup, in backoff mode' % (name,))) ignore.add(name) else: msg = p.stop() if msg is not None: errs.append(RPCError(RestarterFaults.STOP_FAILED,'BACKOFF/%s: %s' % (name,msg))) ignore.add(name) elif state != STARTING and state in RUNNING_STATES: started.add(name) elif state in STOPPED_STATES: p.spawn() if p.spawnerr: errs.append(RPCError(Faults.SPAWN_ERROR,name)) ignore.add(name) elif state not in transit_states: errs.append(RPCError(RestarterFaults.BAD_STATE, '%s: bad state during start [%s]' % (name,_get_state_desc(state)))) ignore.add(name) for i,name in enumerate(sorted(unstopped,reverse=True)): if loop_count < stagger and (i % stagger) != stop_modulus: continue p = get_proc(name) if p is None: continue state = p.get_state() unstopped.discard(name) if state in RUNNING_STATES: msg = p.stop() if msg is not None: errs.append(RPCError(RestarterFaults.STOP_FAILED,'%s: %s' % (name,msg))) ignore.add(name) elif state not in STOPPED_STATES and state not in transit_states: errs.append(RPCError(Faults.BAD_STATE, '%s: bad state during stop [%s]' % (name,_get_state_desc(state)))) ignore.add(name) if not unstopped and started.union(ignore) == allprocs: if errs: return errs return True return NOT_DONE_YET restartem.delay = self.delay restartem.rpcinterface = self return restartem
class CompoundMonitor(Monitor): """Combine (logical-and) multiple failures for emergency escalation. Check most recent proble of provided monitors, if all are fail, then report fail. """ type = "compound" m = None # type: Optional[WeakValueDictionary[str, Monitor]] mt = None # type: Optional[WeakValueDictionary[str, Monitor]] def __init__(self, name: str, config_options: dict) -> None: super().__init__(name, config_options) self.monitors = cast( List[str], self.get_config_option( "monitors", required_type="[str]", required=True, default=[] ), ) self.min_fail = cast( int, self.get_config_option( "min_fail", required_type="int", default=len(self.monitors), minimum=1 ), ) def run_test(self) -> bool: # we depend on the other tests to run, just check them failcount = self.min_fail # this check actually doesn't work, since the sub-monitors run AFTER the compound ones... if self.m is not None: for i in self.monitors: if self.m[i].get_success_count() > 0 and self.m[i].tests_run > 0: failcount -= 1 if failcount < self.min_fail: return self.record_success( "{} monitors failed (min: {})".format(failcount, self.min_fail) ) return self.record_fail( "{} monitors failed (min: {})".format(failcount, self.min_fail) ) def describe(self) -> str: """Explains what we do.""" return "Checking that these monitors all succeeded: {0}".format( ", ".join(self.monitors) ) def get_params(self) -> Tuple: return (self.monitors,) def set_mon_refs(self, mmm: Dict[str, Monitor]) -> None: """ stash a ref to the global monitor list so we can examine later """ self.all_monitors = WeakValueDictionary(mmm) def post_config_setup(self) -> None: """ make a nice little dict of just the monitors we need """ if self.m is not None: return self.m = WeakValueDictionary() for i in list(self.all_monitors.keys()): if i in self.monitors: self.m[i] = self.all_monitors[i] # make sure we find all of our monitors or die during config for i in self.monitors: if i not in list(self.m.keys()): raise RuntimeError("No such monitor %s in compound monitor" % i) def virtual_fail_count(self) -> int: failcount = self.fail_count() if failcount >= self.min_fail: # greater or equal number failed: we return the real failure count return failcount else: # we don't count failures if the specified min_fail isn't reached yet. return 0 def fail_count(self) -> int: # increments the fail counter by 1 if a sub-monitor failed. failcount = 0 if self.m is not None: for i in self.monitors: if self.m[i].virtual_fail_count() > 0: failcount += 1 return failcount def get_result(self) -> str: failcount = self.fail_count() monitorcount = self.monitors.__len__() if failcount > 0: return "{0} of {1} services failed. Fail after: {2}".format( failcount, monitorcount, self.min_fail ) else: return "All {0} services OK".format(monitorcount)
class AbstractCache(ABC): extensions = ['.none.none'] def __init__(self, path=None, weak=False): if weak: self.data = WeakValueDictionary() else: self.data = dict() self.timestamps = defaultdict(lambda: np.inf) if path is not None: self.path = Path(path) else: self.path = None @abstractmethod def write(self, key: str, value): raise NotImplementedError @abstractmethod def read(self, key: str): raise NotImplementedError def __contains__(self, key: str): return key in self.ls() def __getitem__(self, key: str): if key not in self: raise KeyError(key) if self.path is None: return self.data[key] if key not in self.data or self.timestamps[key] < self.get_timestamp(key): self.data[key] = self.read(key) self.timestamps[key] = self.get_timestamp(key) return self.data[key] def __setitem__(self, key: str, value): if key in self: raise AlreadyStored(key) self.data[key] = value if self.path is None: return self.write(key, value) self.timestamps[key] = self.get_timestamp(key) def __delitem__(self, key: str): if key in self.data: del self.data[key] if self.path is None: return if key in self.timestamps: del self.timestamps[key] self.remove_file(key) def get_timestamp(self, key: str): for ext in self.extensions: path = self.path / (key + ext) if path.exists(): return os.path.getmtime(path) def remove_file(self, key: str): for ext in self.extensions: path = self.path / (key + ext) if path.exists(): return os.remove(path) def ls(self): if self.path is None: return list(self.data.keys()) paths = sum((glob(f'{self.path}/*{ext}') for ext in self.extensions), []) paths = sorted(paths, key=os.path.getmtime) names = [path[path.rfind('/') + 1:] for path in paths] names = [ name[:name.find(ext)] if ext in name else None for ext in self.extensions for name in names ] names = [name for name in names if name is not None] return names def save(self, value, key: str): """backwards compatibility""" self[key] = value def load(self, key: str): """backwards compatibility""" return self[key] def remove(self, key: str): """backwards compatibility""" del self[key]
class ExpressionVariable(Variable): def __init__(self, expr, ind_vars, complex=False): super(ExpressionVariable, self).__init__(complex=complex) variables = [] st = parser.expr(expr) code = st.compile('<string>') names = code.co_names self.co = code self.names = names self.expr = expr self.ind_vars = ind_vars self.variables = WVD() #print 'Check Expression', expr.__repr__(), names def __repr__(self): return "Expression(" + self.expr + ")" def set_point(self, T, ip, g, l, t=None): self.x = T.Transform(ip) for n in self.names: if (n in g and isinstance(g[n], Variable)): g[n].set_point(T, ip, g, l, t=t) self.variables[n] = g[n] def __call__(self, **kwargs): l = {} for k, name in enumerate(self.ind_vars): l[name] = self.x[k] keys = self.variables.keys() for k in keys: l[k] = self.variables[k]() return (eval_code(self.co, var_g, l)) def get_emesh_idx(self, idx=None, g=None): if idx is None: idx = [] for n in self.names: if n in g and isinstance(g[n], Variable): idx = g[n].get_emesh_idx(idx=idx, g=g) return idx def nodal_values(self, iele=None, el2v=None, locs=None, wverts=None, elvertloc=None, g=None, **kwargs): size = len(wverts) dtype = np.complex if self.complex else np.float ret = np.zeros(size, dtype=dtype) for kk, m, loc in zip(iele, el2v, elvertloc): if kk < 0: continue for pair, xyz in zip(m, loc): idx = pair[1] ret[idx] = 1 l = {} ll_name = [] ll_value = [] var_g2 = var_g.copy() for n in self.names: if (n in g and isinstance(g[n], Variable)): l[n] = g[n].nodal_values(iele=iele, el2v=el2v, locs=locs, wverts=wverts, elvertloc=elvertloc, g=g, **kwargs) ll_name.append(n) ll_value.append(l[n]) elif (n in g): var_g2[n] = g[n] if len(ll_name) > 0: value = np.array([ eval(self.co, var_g2, dict(zip(ll_name, v))) for v in zip(*ll_value) ]) else: for k, name in enumerate(self.ind_vars): l[name] = locs[..., k] value = np.array(eval_code(self.co, var_g2, l), copy=False) if value.ndim > 1: value = np.stack([value] * size) #value = np.array(eval_code(self.co, var_g, l), copy=False) from petram.helper.right_broadcast import multi ret = multi(ret, value) return ret def ncface_values(self, ifaces=None, irs=None, gtypes=None, g=None, attr1=None, attr2=None, locs=None, **kwargs): size = len(locs) dtype = np.complex if self.complex else np.float ret = np.zeros(size, dtype=dtype) l = {} ll_name = [] ll_value = [] var_g2 = var_g.copy() for n in self.names: if (n in g and isinstance(g[n], Variable)): l[n] = g[n].ncface_values(ifaces=ifaces, irs=irs, gtypes=gtypes, locs=locs, attr1=attr1, attr2=attr2, g=g, **kwargs) ll_name.append(n) ll_value.append(l[n]) elif (n in g): var_g2[n] = g[n] if len(ll_name) > 0: value = np.array([ eval(self.co, var_g2, dict(zip(ll_name, v))) for v in zip(*ll_value) ]) else: for k, name in enumerate(self.ind_vars): l[name] = locs[..., k] value = np.array(eval_code(self.co, var_g2, l), copy=False) if value.ndim > 1: value = np.stack([value] * size) return value
class BaseNode(object): _handlers = [] @classmethod def addHandler(cls, handler): assert isinstance(handler, Handler.Handler) cls._handlers.insert(0, handler) #LIFO @classmethod def removeHandler(cls, handler): assert isinstance(handler, Handler) cls._handlers.remove(handler) @classmethod def getHandlers(cls): return cls._handlers def __init__(self, nodetype, root, parent, name): self.nodetype = nodetype if root is None and parent is None: root = self self.root = root self.parent = parent self.name = name self.space = Space() self._children = {} self._links = WeakValueDictionary() self._children_names = [] self._myhandlers = None self._lock_children = False self.currentAction = None self.assigned = list(self.__dict__.keys()) def __setattr__(self, attr, value): if hasattr(self, "assigned"): if attr not in self.assigned: raise AttributeError(attr) self.__dict__[attr] = value def getChildren(self): return self._children_names def getLinks(self): return self._links.keys() def hasChild(self, name): return name in self._children def addChild(self, nodetype, name): self.currentAction = "addChild" logger("addChild", self.nodetype, self.name, nodetype, name) assert not self._lock_children assert name not in self._children, name self._lock_children = True child = None try: child = Node(nodetype, self.root, self, name) child.sendMessage("init") self._children[name] = child self._children_names.append(name) finally: self._lock_children = False child.sendMessage("post-init") self.currentAction = None return child def attachChild(self, child, name, position=None): self.currentAction = "attachChild" logger("attachChild", self.nodetype, self.name, name, child.nodetype, position) assert not self._lock_children assert name not in self._children, name self._lock_children = True try: self._children[name] = child if position is None: self._children_names.append(name) else: self._children_names.insert(position, name) for n in range(position+1, len(self._children)): childname = self._children_names[n] self._children[childname].sendMessage("child-index-increase") finally: self._lock_children = False child.parent = self child.name = name set_root(child, self.root) child.sendMessage("attach") self.currentAction = None return child def attachLink(self, link, name): self.currentAction = "attachLink" logger("attachLink", self.nodetype, self.name, name, link.nodetype) assert name not in self._children, name assert name not in self._links, name self._links[name] = link self.currentAction = None return link def insertChild(self, nodetype, name, position): self.currentAction = "insertChild" logger("insertChild", self.nodetype, self.name, nodetype, name, position) assert not self._lock_children assert name not in self._children, name self._lock_children = True child = None try: child = Node(nodetype, self.root, self, name) child.sendMessage("init") self._children[name] = child self._children_names.insert(position, name) for n in range(position+1, len(self._children)): childname = self._children_names[n] self._children[childname].sendMessage("child-index-increase") finally: self._lock_children = False child.sendMessage("post-init") self.currentAction = None return child def removeChild(self, name): self.currentAction = "removeChild" logger("removeChild", self.nodetype, self.name, name) assert not self._lock_children assert name in self._children, name self._lock_children = True child = None try: position = self._children_names.index(name) child = self._children[name] child.sendMessage("destroy") self._children_names.remove(name) self._children.pop(name) for n in range(position, len(self._children)): childname = self._children_names[n] self._children[childname].sendMessage("child-index-decrease") finally: self._lock_children = False child.sendMessage("post-destroy") self.currentAction = None def detachChild(self, name): self.currentAction = "detachChild" logger("detachChild", self.nodetype, self.name, name) assert not self._lock_children assert name in self._children, name self._lock_children = True child = None try: position = self._children_names.index(name) child = self._children[name] child.sendMessage("pre-detach") child.parent = None child.name = None self._children_names.remove(name) self._children.pop(name) for n in range(position, len(self._children)): childname = self._children_names[n] self._children[childname].sendMessage("child-index-decrease") child.sendMessage("post-detach") finally: self._lock_children = False self.currentAction = None return child def detachAndReplaceChild(self, name, newchild): self.currentAction = "detachAndReplaceChild" logger("detachAndReplaceChild", self.nodetype, self.name, name, newchild.nodetype) assert not self._lock_children assert newchild.parent is None assert newchild.name is None assert name in self._children, name self._lock_children = True child = None try: position = self._children_names.index(name) child = self._children[name] child.sendMessage("pre-detach") child.parent = None child.name = None self._children[name] = newchild newchild.parent = self newchild.name = name set_root(newchild, self.root) newchild.sendMessage("attach") child.sendMessage("post-detach") finally: self._lock_children = False self.currentAction = None return child def replaceChild(self, name, newchild): self.currentAction = "replaceChild" logger("replaceChild", self.nodetype, self.name, name, newchild.nodetype) assert not self._lock_children assert newchild.parent is None assert newchild.name is None assert name in self._children, name self._lock_children = True child = None try: position = self._children_names.index(name) child = self._children[name] child.sendMessage("destroy") self._children[name] = newchild newchild.parent = self newchild.name = name set_root(newchild, self.root) newchild.sendMessage("attach") child.sendMessage("post-destroy") finally: self._lock_children = False self.currentAction = None return child def sendMessage(self, message, args = ()): self.currentAction = "sendMessage" logger("sendMessage", self.nodetype, self.name, message, args) if self._myhandlers is None: self._buildMyHandlers() try: matched = False while 1: matches = [] fallbacks = [] claimed = False for handler in self._myhandlers: if handler.nodetype is not None and handler.nodetype != self.nodetype: continue #TODO: see buildMyHandlers if handler.message is not None and handler.message != message: continue poll = handler.poll(message, self.nodetype) if poll == Handler.CLAIM: result = handler.invoke(message, self, args) assert result != Handler.NO_MATCH claimed = True break elif poll == Handler.MATCH: matches.append(handler) elif poll == Handler.NO_MATCH: continue elif poll == Handler.FALLBACK: fallbacks.append(handler) else: raise Exception("Unknown handler poll result", handler, result) if claimed: matched = True break for handler in matches: result = handler.invoke(message, self, args) if result == Handler.CLAIM: matched = True break elif result == Handler.MATCH: matched = True continue elif result == Handler.NO_MATCH: continue else: raise HandlerError("Unknown handler invoke result", handler._invokefunc, result) if matched: break for handler in fallbacks: result = handler.invoke(message, self, args) if result == Handler.CLAIM: matched = True break elif result == Handler.MATCH: matched = True continue elif result == Handler.NO_MATCH: continue else: raise HandlerError("Unknown handler invoke result", handler, result) break if not matched: raise HandlerError("No handler for this message", message, self.nodetype, self.name ) finally: self.currentAction = None def morph(self, newnodetype): self.currentAction = "morph" logger("morph", self.nodetype, self.name, newnodetype) self._myhandlers = None oldnodetype = self.nodetype self.nodetype = newnodetype try: for childname in list(self._children.keys()): if childname not in self._children: continue #may have been removed in the meantime... self._children[childname].sendMessage("parent-morph", (oldnodetype, newnodetype)) if self.parent is not None: self.parent.sendMessage("child-morph", (self.name, oldnodetype, newnodetype)) finally: self.currentAction = None def _buildMyHandlers(self): myhandlers = [] nodetype = self.nodetype """ for handler in self._handlers: if handler.nodetype is not None and handler.nodetype != nodetype: continue myhandlers.append(handler) self._myhandlers = myhandlers """ self._myhandlers = self._handlers def __getitem__(self, item): try: return self._children[item] except KeyError: if item not in self._links: raise return self._links[item]
class BufferManager(object): MEMFLAGS= mf.READ_WRITE|mf.COPY_HOST_PTR READ = 0 WRITE = 1 DEBUG = True def __init__(self, engine=None, context=None, queue=None): if engine is None and context is None: # Look in environment for engine selection engine = os.environ.get("ENGINE", None) assert engine != None self.ctx = context self.queue = queue if self.ctx is None: self.ctx = pyopencl.Context([pyopencl.get_platforms()[0].get_devices()[engine]]) self.queue = None if self.queue is None: self.queue = pyopencl.CommandQueue(self.ctx) # Buffer management self.arrays = Weak() self.buffers = {} self.hits = self.misses = 0 self.purged = 0 def purgeBuffers(self): # Drop buffers if no array or data buffer is referencing them ids = set(self.arrays.keys()) for id in self.buffers.keys(): if not id in ids: del self.buffers[id] self.purged += 1 def makeBuffer(self, a): # print "makeBuffer", id(a), a.shape, a.size, id(a.data) buf = pyopencl.Buffer(self.ctx, self.MEMFLAGS, hostbuf=a) aid = id(a) self.arrays[aid] = a self.buffers[aid] = buf return buf def ensureBuffer(self, a): buf = self.findBuffer(a, self.WRITE) if buf is None: buf = self.makeBuffer(a) return buf def readBuffer(self, a): # print "readBuffer", id(a) buf = self.findBuffer(a, self.READ) shape = a.shape strides = a.strides a.shape = (a.size,) a.strides = (strides[-1],) #pyopencl.enqueue_barrier(self.queue) pyopencl.enqueue_copy(self.queue, a, buf).wait() a.shape = shape a.strides = strides return buf def writeBuffer(self, a): # print "writeBuffer", id(a) buf = self.ensureBuffer(a) shape = a.shape strides = a.strides a.shape = (a.size,) a.strides = (strides[-1],) pyopencl.enqueue_copy(self.queue, buf, a).wait() a.shape = shape a.strides = strides #pyopencl.enqueue_barrier(self.queue) return buf def findBuffer(self, a, op): "Find an appropriate buffer. Tricky." assert op in (self.READ, self.WRITE) self.purgeBuffers() aid = id(a) havea = aid in self.buffers # Complete match, easy decision if havea: self.hits += 1 return self.buffers[aid] else: self.misses += 1 # No match at all, also easy. if not havea: # Reading an array back with no matching buffer is fatal if op == self.READ: raise ValueError("Array not in yapocis management, you may have written to it, or be using an assigned or .copy") return None raise "Epic fail"
class Entity: def __init__(self, entType, entValue, entField): if isinstance(entField, Field): self.type = entType self.value = entValue self.field = entField self.group = None self.links = WeakValueDictionary() # dict of linked entities self.field.registerEntity(self) # update the entity registry else: raise TypeError("Invalid field argument, field instance expected!") def linkTo(self, eTwo): ''' Linking operation is bi-directional, affects both entities equally.''' # check if entities not already linked if Edge.linkId(self, eTwo) not in self.links.keys(): # update both entities' list of links # create a new edge newlink = Edge(self, eTwo, self.field) self.links[newlink.id] = eTwo eTwo.links[newlink.id] = self # case when the first entity's group is not set if self.group is None: # assuming the second entity has already a group assigned try: eTwo.group.addMember(self) # except the second entity has no group except AttributeError: newGroup = Group(self.field) newGroup.addMember(self) newGroup.addMember(eTwo) # case when the first entity's group is set, but the second entity's is not elif eTwo.group is None: self.group.addMember(eTwo) # case when both entities have groups set and they are different groups elif self.group.name != eTwo.group.name: if self.group.size > eTwo.group.size: # first group wins self.group.annexMembers(eTwo.group) else: # second group wins eTwo.group.annexMembers(self.group) def getLinks(self): ''' Print the list of entities directly linked.''' return self.links.values() def removeLink(self, eTwo): ''' Remove linked entity.''' linkId = Edge.linkId(self, eTwo) self.links.pop(linkId) def __repr__(self): return repr(self.value) def __del__(self): ''' Delete itself from linked entities, and delete links.''' # remove link from linked entity necessary? no because it's a weaklink for linkId in self.links.keys(): self.field.eliminateEdge(linkId) del self
class TaskManager(object): """ Provides a set of tools to maintain a list of asyncio Tasks that are to be executed during the lifetime of an arbitrary object, usually getting killed with it. """ def __init__(self): self._pending_tasks = WeakValueDictionary() self._task_lock = RLock() self._shutdown = False self._counter = 0 self._logger = logging.getLogger(self.__class__.__name__) self._checker = self.register_task('_check_tasks', self._check_tasks, interval=MAX_TASK_AGE, delay=MAX_TASK_AGE * 1.5) def _check_tasks(self): now = time.time() for name, task in self._pending_tasks.items(): if not task.interval and now - task.start_time > MAX_TASK_AGE: self._logger.warning( 'Non-interval task "%s" has been running for %.2f!', name, now - task.start_time) def replace_task(self, name, *args, **kwargs): """ Replace named task with the new one, cancelling the old one in the process. """ new_task = Future() def cancel_cb(_): try: new_task.set_result(self.register_task(name, *args, **kwargs)) except Exception as e: new_task.set_exception(e) old_task = self.cancel_pending_task(name) old_task.add_done_callback(cancel_cb) return new_task def register_task(self, name, task, *args, delay=None, interval=None, ignore=()): """ Register a Task/(coroutine)function so it can be canceled at shutdown time or by name. """ if not isinstance( task, Task) and not iscoroutinefunction(task) and not callable(task): raise ValueError( 'Register_task takes a Task or a (coroutine)function as a parameter' ) if (interval or delay) and isinstance(task, Task): raise ValueError('Cannot run Task at an interval or with a delay') if not isinstance(ignore, tuple) or not all( (issubclass(e, Exception) for e in ignore)): raise ValueError('Ignore should be a tuple of Exceptions or None') with self._task_lock: if self._shutdown: self._logger.warning("Not adding task %s due to shutdown!", str(task)) if isinstance(task, (Task, Future)): if not task.done(): task.cancel() return task if self.is_pending_task_active(name): raise RuntimeError("Task already exists: '%s'" % name) if iscoroutinefunction(task) or callable(task): task = task if iscoroutinefunction(task) else coroutine(task) if interval: # The default delay for looping calls is the same as the interval delay = interval if delay is None else delay task = ensure_future( interval_runner(delay, interval, task, *args)) elif delay: task = ensure_future(delay_runner(delay, task, *args)) else: task = ensure_future(task(*args)) # Since weak references to list/tuple are not allowed, we're not storing start_time/interval # in _pending_tasks. Instead we add them as attributes to the task. task.start_time = time.time() task.interval = interval assert isinstance(task, Task) def done_cb(future): self._pending_tasks.pop(name, None) try: future.result() except CancelledError: pass except ignore as e: self._logger.error('Task resulted in error: %s', e) self._pending_tasks[name] = task task.add_done_callback(done_cb) return task def register_anonymous_task(self, basename, task, *args, **kwargs): """ Wrapper for register_task to derive a unique name from the basename. """ self._counter += 1 return self.register_task(basename + ' ' + str(self._counter), task, *args, **kwargs) def cancel_pending_task(self, name): """ Cancels the named task """ with self._task_lock: task = self._pending_tasks.get(name, None) if not task: return succeed(None) if not task.done(): task.cancel() self._pending_tasks.pop(name, None) return task def cancel_all_pending_tasks(self): """ Cancels all the registered tasks. This usually should be called when stopping or destroying the object so no tasks are left floating around. """ with self._task_lock: assert all([ isinstance(t, (Task, Future)) for t in self._pending_tasks.values() ]), self._pending_tasks return [ self.cancel_pending_task(name) for name in list(self._pending_tasks.keys()) ] def is_pending_task_active(self, name): """ Return a boolean determining if a task is active. """ with self._task_lock: task = self._pending_tasks.get(name, None) return not task.done() if task else False def get_tasks(self): """ Returns a list of all registered tasks, excluding tasks the are created by the TaskManager itself. """ with self._task_lock: return [ t for t in self._pending_tasks.values() if t != self._checker ] async def wait_for_tasks(self): """ Waits until all registered tasks are done. """ with self._task_lock: tasks = self.get_tasks() if tasks: await gather(*tasks, return_exceptions=True) async def shutdown_task_manager(self): """ Clear the task manager, cancel all pending tasks and disallow new tasks being added. """ with self._task_lock: self._shutdown = True tasks = self.cancel_all_pending_tasks() if tasks: with suppress(CancelledError): await gather(*tasks)
class NmTensorNameRegistry: def __init__(self): """ Constructor. Initializes the NmTensorNameRegistry. Reserves the default 'loss' name. TODO: We should be recording the tensors of each graph rather than all the tensors. """ # Create the nmtensor_naming_dict # which contains a mapping of str to NMTensor.unique_name self._nmtensor_naming_dict = { "loss": "loss" } # Reserve keyname of 'loss' # Create a dict that maps unique_names to tensors for use with TrainingState.get_tensor() self._nmtensor_uniname_dict = WeakValueDictionary() @property def unique_names(self): """Returns the set of all NmTensors.unique_names + 'loss' """ return list(self._nmtensor_uniname_dict.keys()) + ["loss"] def register(self, tensor: 'NmTensor'): """Helper function to register a newly created NmTensor by adding it to self.__nmtensor_uniname_dict. Should be called from NmTensor.__init__() args: tensor (NmTensor): The tensor to be registered. """ # Check if object is already in a set. if tensor.unique_name in self._nmtensor_uniname_dict: pass # Finally, add object to the set. self._nmtensor_uniname_dict[tensor.unique_name] = tensor def rename_NmTensor(self, tensor: 'NmTensor', new_name: str): """Helper function that changes the naming dictionary to facilitate user name -> tensor.unique_name lookup. args: tensor (NmTensor): The tensor to be renamed. new_name (str): its new name. """ # Find old name if exists old_name = tensor.unique_name for custom_name, unique_name in self._nmtensor_naming_dict.items(): if unique_name == tensor.unique_name: old_name = custom_name if old_name != tensor.unique_name: del self._nmtensor_naming_dict[old_name] if new_name in self._nmtensor_naming_dict: raise KeyError( f"{new_name} already exists in current graph. Please use a unique name" ) self._nmtensor_naming_dict[new_name] = tensor.unique_name def __getitem__(self, key: str): """ Object getter function. Args: key: Object name. Returns: Object associated with the key. """ # Search for an object with a given name. if key in self._nmtensor_naming_dict: key = self._nmtensor_naming_dict[key] if key in self._nmtensor_uniname_dict or key == "loss": return key raise KeyError("A NmTensor with name `{}` don't exists!".format(key))
class Memory(object): """ The memory manager. This class handles all virtual memory mappings and symbolic chunks. """ def __init__(self, addressbitsize=32, pagebitsize=12): ''' Builds a memory chunk. @param addressbitsize: size in bits of the address space (default=32). @param pagebitsize: size in bits of the page boundary (default=12). ''' assert addressbitsize in [16, 32, 64], "Not supported address bit size" assert pagebitsize in [12, 13], "Not supported page bit size" self.addressbitsize = addressbitsize self.pagebitsize = pagebitsize self.maps = set() self.page2map = WeakValueDictionary() #{page -> ref{MAP}} def _ceil(self, address): """ Returns the smallest page boundary value not less than the address. @rtype: int @param address: the address to calculate its ceil. @return: the ceil of C{address}. """ pagemask = (1 << self.pagebitsize) - 1 addrmask = (1 << self.addressbitsize) - 1 return ((address | pagemask) + 1 ) & addrmask def _floor(self, address): """ Returns largest page boundary value not greater than the address. @rtype: int @param address: the address to calculate its floor. @return: the floor of C{address}. """ pagemask = (1 << self.pagebitsize) - 1 return address & ~pagemask def _page(self, address): """ Calculates the page number of an address. @rtype: int @param address: the address to calculate its page number. @return: the page number address of C{address}. """ return address >> self.pagebitsize def _search(self, size, start=0x10000000, counter=0): """ Recursively searches the address space for enough free space to allocate C{size} bytes. @rtype: int @param size: the size in bytes to allocate. @param start: an address from where to start the search. @param counter: internal parameter to know if all the memory was already scanned. @return: the address of an available space to map C{size} bytes. @raise MemoryException: if there is no space available to allocate the desired memory. @todo: Document what happens when you try to allocate something that goes round the address 32/64 bit representation. """ if counter > 1 << self.addressbitsize: raise MemoryException("Not enough memory") #Alloc starting in second page in case of overflow. if start+ size > 1 << self.addressbitsize: start = 1 << self.pagebitsize for p in xrange(self._page(start), self._page(self._ceil(start+size-1))): if p in self.page2map: return self._search(size, start=self.page2map[p].end, counter= counter+self.page2map[p].end-start) assert start+size <= (1 << self.addressbitsize) return start def mmapFile(self, addr, size, perms, filename, offset=0): """ Creates a new file mapping in the memory address space. @rtype: int @param addr: the starting address (took as hint). If C{addr} is C{0} the first big enough chunk of memory will be selected as starting address. @param size: the contents of a file mapping are initialized using C{size} bytes starting at offset C{offset} in the file C{filename}. @param perms: the access permissions to this memory. @param filename: the pathname to the file to map. @param offset: the contents of a file mapping are initialized using C{size} bytes starting at offset C{offset} in the file C{filename}. @return: the starting address where the file was mapped. @raise error: - "Address shall be concrete" if C{addr} is not an integer number. - "Address too big" if C{addr} goes beyond the limit of the memory. - "Map already used" if the piece of memory starting in C{addr} and with length C{size} isn't free. """ #If addr is NULL, the system determines where to allocate the region. if addr == None: addr = 0x10000000 assert type(addr) in [int, long], "Address shall be concrete" assert (addr <= ((1 << self.addressbitsize)-1)), "Address too big" #address is rounded down to the nearest multiple of the allocation granularity addr = self._floor(addr) #size value is rounded up to the next page boundary size = self._ceil(size-1) #If zero search for a spot addr = self._search(size, addr) #It should not be allocated for i in xrange(self._page(addr), self._page(addr+size)): assert not i in self.page2map, "Map already used" #Create the anonymous map m = MMapFile(addr, size, perms, filename, offset , addressbitsize=self.addressbitsize, pagebitsize=self.pagebitsize) #Okay, ready to alloc self.maps.add(m) #updating the page to map translation for i in range(self._page(m.start), self._page(m.end)): self.page2map[i] = m return addr def mmap(self, addr, size, perms, data_init=None): """ Creates a new mapping in the memory address space. @rtype: int @param addr: the starting address (took as hint). If C{addr} is C{0} the first big enough chunk of memory will be selected as starting address. @param size: the length of the mapping. @param perms: the access permissions to this memory. @param data_init: optional data to initialize this memory. @return: the starting address where the memory was mapped. @raise error: - "Address shall be concrete" if C{addr} is not an integer number. - "Address too big" if C{addr} goes beyond the limit of the memory. - "Map already used" if the piece of memory starting in C{addr} and with length C{size} isn't free. """ #If addr is NULL, the system determines where to allocate the region. if addr == None: addr = 0x10000000 assert type(addr) in [int, long], "Address shall be concrete" assert (addr <= ((1 << self.addressbitsize)-1)), "Address too big" #address is rounded down to the nearest multiple of the allocation granularity addr = self._floor(addr) #size value is rounded up to the next page boundary size = self._ceil(size-1) #If zero search for a spot addr = self._search(size, addr) #It should not be allocated for i in xrange(self._page(addr), self._page(addr+size)): assert not i in self.page2map, "Map already used" #Create the anonymous map m = MMapAnon(start=addr, size=size, perms=perms, data_init=data_init, addressbitsize=self.addressbitsize, pagebitsize=self.pagebitsize) #Okay, ready to alloc self.maps.add(m) #updating the page to map translation for i in range(self._page(m.start), self._page(m.end)): self.page2map[i] = m logger.debug("New memory map @%x size:%x", addr, size) return addr def mappings(self): """ Returns a sorted list of all the mappings for this memory. @rtype: list @return: a list of mappings. """ result = [] for m in self.maps: if isinstance(m, MMapAnon): result.append((m.start, m.end, m.perms, 0, '')) elif isinstance(m, MMapFile): result.append((m.start, m.end, m.perms, m.offset, m.filename)) else: result.append((m.start, m.end, m.perms, 0, '')) return sorted(result) def __str__(self): return '\n'.join(["%016x-%016x %s %08x %s"%(start, end, p, offset, filename) for start, end, p, offset, filename in self.mappings()]) def munmap(self, start, size): """ Deletes the mappings for the specified address range and causes further references to addresses within the range to generate invalid memory references. @param start: the starting address to delete. @param size: the length of the unmapping. """ start = self._floor(start) size = self._ceil(size-1) #select all mappings that have at least 1 byte unmaped affected = set() p = self._page(start) while p < self._page(self._ceil(start+size)): if p in self.page2map: m = self.page2map[p] affected.add(m) p = self._page(m.end) else: p += 1 new_maps = [] for m in affected: #remove m pages from the page2maps.. for p in xrange(self._page(m.start), self._page(m.end)): del self.page2map[p] #remove m from the maps set self.maps.remove(m) #unmap the range from m possibly generating 0, 1 or 2 new maps new_maps += m.unmap(start, size) #reattach the newly generated maps (it may be none) for nm in new_maps: self.maps.add(nm) for p in xrange(self._page(nm.start), self._page(nm.end)): self.page2map[p] = nm logger.debug("Unmap memory @%x size:%x", start, size) def mprotect(self, start, size, perms): ''' Changes the access permissions to the memory mapped in the specified range. @param start: start range address. @param size: size of the range. @param perms: new permissions for the memory within the range. @todo: fix when fail return True./False/Exception? @todo: check perms and what happens if the same of existent perms. ''' start = self._floor(start) end = self._ceil(start+size-1) size = end-start #select all mappings that have at least 1 byte mprotected affected = set() p = self._page(start) while p < self._page(end): if p in self.page2map.keys(): m = self.page2map[p] #if perms.replace(' ', '') != m.perms.replace(' ', ''): affected.add(m) p = self._page(m.end) else: p += 1 new_maps = [] for m in affected: #remove m pages from the page2maps.. for p in xrange(self._page(m.start), self._page(m.end-1)): del self.page2map[p] #remove m from the maps set self.maps.remove(m) #unmap the range from m posibly generating 0, 1 or 2 new maps new_maps += m.mprotect(start, size, perms) #reattach the newly generated maps (it may be none) for nm in new_maps: self.maps.add(nm) for p in xrange(self._page(nm.start), self._page(nm.end)): self.page2map[p] = nm logger.debug("Change perms to memory @%x size:%x newperms: %s", start, size, perms) def _getMap(self, address): """ Returns the L{MMap} object containing the address. @rtype: L{MMap} @param address: the address to obtain its mapping. @todo: symbolic address """ return self.page2map[self._page(address)] #Permissions def getPermissions(self, address): """ Returns the permissions of an address. @rtype: str @param address: the address to obtain its permissions. @todo: symbolic address """ return self._getMap(address).perms def isValid(self, address): """ Returns C{True} if C{address} is a valid mapped address. @rtype: bool @param address: the address to know if it is valid or not. @return: - C{True} if the address is a valid mapped address. - C{False} if the address is not a valid mapped address. @todo: symbolic address """ return self._page(address) in self.page2map def isExecutable(self, address): """ Returns C{True} if C{address} is executable. @rtype: bool @param address: the address to know if it is executable or not. @return: - C{True} if the address is executable. - C{False} if the address is not executable. @todo: symbolic address """ return self.isValid(address) and self._getMap(address).isExecutable() def isWriteable(self, address): """ Returns C{True} if C{address} is writable. @rtype: bool @param address: the address to know if it is writable or not. @return: - C{True} if the address is writable. - C{False} if the address is not writable. @todo: symbolic address """ return self.isValid(address) and self._getMap(address).isWriteable() def isReadable(self, address): """ Returns C{True} if C{address} is readable. @rtype: bool @param address: the address to know if it is readable or not. @return: - C{True} if the address is readable. - C{False} if the address is not readable. @todo: symbolic address """ return self.isValid(address) and self._getMap(address).isReadable() #write and read potentially symbolic bytes at symbolic indexes def putchar(self, addr, data): """ Memory based putchar implementation. @param addr: the address where to put the data. @param data: character to put in this address of memory. @raise MemoryExcetion: if the address is not mapped. @todo: if addr is Readable/Executable? """ if not self.isValid(addr): raise MemoryException("Page Fault Writing", addr) m = self._getMap(addr) m.putchar(addr, data) return def getchar(self, addr): """ Memory based getchar implementation. @rtype: str[1] @param addr: the address where to obtain the character. @return: the character at the specified address. @raise MemoryExcetion: if the address is not mapped. @todo: if addr is Readable/Executable? """ if not self.isValid(addr): raise MemoryException("Page Fault Reading", addr) #Concrete case get the corresponding Map m = self._getMap(addr) return m.getchar(addr) #marshaling/pickle def __getstate__(self): state = {} state['addressbitsize'] = self.addressbitsize state['pagebitsize'] = self.pagebitsize state['maps'] = self.maps return state def __setstate__(self, state): self.addressbitsize = state['addressbitsize'] self.pagebitsize = state['pagebitsize'] self.maps = state['maps'] self.page2map = WeakValueDictionary() for m in self.maps: for i in range(self._page(m.start), self._page(m.end)): self.page2map[i] = m
class MessageFactory(object): """Class allowing to register new message types and pack/unpack them. :param s_adapter: :term:`serialization adapter` (default: None - library selected with :func:`.init`) """ def __init__(self, s_adapter=None): self._message_names = {} # name -> message self._message_types = WeakValueDictionary() # type_id -> message self._message_params = WeakKeyDictionary() # message -> type_id, send kwargs if s_adapter is None: self.s_adapter = serialization else: self.s_adapter = s_adapter self._type_id_cnt = 0 self._frozen = False self._hash = None def register(self, name, field_names=tuple(), **kwargs): """Register new message type. :param name: name of message class :param field_names: list of names of message fields :param kwargs: additional keyword arguments for send method :return: message class (namedtuple) """ if self._frozen == True: _logger.warning("Can't register new messages after connection " "establishment") type_id = self._type_id_cnt = self._type_id_cnt + 1 packet = namedtuple(name, field_names) self._message_names[name] = packet self._message_types[type_id] = packet self._message_params[packet] = (type_id, kwargs) return packet def pack(self, message): """Pack data to string. :param message: object of class created by register :return: string """ type_id = self._message_params[message.__class__][0] message = (type_id,) + message data = self.s_adapter.pack(message) _logger.debug("Packing message (length: %d)", len(data)) return data def set_frozen(self): """Disable ability to register new messages to allow generation of hash. """ self._frozen = True def reset_context(self, context): """Prepares object to behave as context for stream unpacking. :param context: object which will be prepared """ context._unpacker = self.s_adapter.unpacker() def _process_message(self, message): try: type_id = message[0] return self._message_types[type_id](*message[1:]) except KeyError: _logger.error('Unknown message type_id: %s', type_id) except: _logger.error('Message unpacking error: %s', message) def unpack(self, data): """Unpack message from string. :param data: packed message data as a string :return: message """ _logger.debug("Unpacking message (length: %d)", len(data)) message = self.s_adapter.unpack(data) if message is not None: return self._process_message(message) else: _logger.error('Data corrupted') _logger.debug('Data: %r', data) def unpack_all(self, data, context): """Feed unpacker with data from stream and unpack all messages. :param data: packed message(s) data as a string :param context: object previously prepared with :meth:`reset_context` :return: iterator over messages """ _logger.debug("Unpacking data (length: %d)", len(data)) context._unpacker.feed(data) try: for message in context._unpacker: yield self._process_message(message) except: _logger.error('Data corrupted') self._reset_unpacker() # prevent from corrupting next data return def get_by_name(self, name): """Returns message class with given name. :param name: name of message :return: message class (namedtuple) """ return self._message_names[name] def get_by_type(self, type_id): """Returns message class with given type_id. :param type_id: type identifier of message :return: message class (namedtuple) """ return self._message_types[type_id] def get_params(self, message_cls): """Return tuple containing type_id, and sending keyword arguments :param message_cls: message class created by register :return: int, dict """ return self._message_params[message_cls] def get_hash(self): """Calculate and return hash. Hash depends on registered messages and used serializing library. :return: int """ if self._frozen: if self._hash is None: ids = self._message_types.keys() ids.sort() l = list() a = getattr(self.s_adapter, 'selected_adapter', self.s_adapter) l.append(a.__name__) for i in ids: p = self._message_types[i] l.append((i, p.__name__, p._fields)) # should be the same on 32 & 64 platforms self._hash = hash(tuple(l)) & 0xffffffff return self._hash else: _logger.warning('Attempt to get hash of not frozen MessageFactory')
class Memory(object): """ The memory manager. This class handles all virtual memory mappings and symbolic chunks. """ def __init__(self, addressbitsize=32, pagebitsize=12): ''' Builds a memory chunk. @param addressbitsize: size in bits of the address space (default=32). @param pagebitsize: size in bits of the page boundary (default=12). ''' assert addressbitsize in [16, 32, 64], "Not supported address bit size" assert pagebitsize in [12, 13], "Not supported page bit size" self.addressbitsize = addressbitsize self.pagebitsize = pagebitsize self.maps = set() self.page2map = WeakValueDictionary() #{page -> ref{MAP}} def _ceil(self, address): """ Returns the smallest page boundary value not less than the address. @rtype: int @param address: the address to calculate its ceil. @return: the ceil of C{address}. """ pagemask = (1 << self.pagebitsize) - 1 addrmask = (1 << self.addressbitsize) - 1 return ((address | pagemask) + 1) & addrmask def _floor(self, address): """ Returns largest page boundary value not greater than the address. @rtype: int @param address: the address to calculate its floor. @return: the floor of C{address}. """ pagemask = (1 << self.pagebitsize) - 1 return address & ~pagemask def _page(self, address): """ Calculates the page number of an address. @rtype: int @param address: the address to calculate its page number. @return: the page number address of C{address}. """ return address >> self.pagebitsize def _search(self, size, start=0x10000000, counter=0): """ Recursively searches the address space for enough free space to allocate C{size} bytes. @rtype: int @param size: the size in bytes to allocate. @param start: an address from where to start the search. @param counter: internal parameter to know if all the memory was already scanned. @return: the address of an available space to map C{size} bytes. @raise MemoryException: if there is no space available to allocate the desired memory. @todo: Document what happens when you try to allocate something that goes round the address 32/64 bit representation. """ if counter > 1 << self.addressbitsize: raise MemoryException("Not enough memory") #Alloc starting in second page in case of overflow. if start + size > 1 << self.addressbitsize: start = 1 << self.pagebitsize for p in xrange(self._page(start), self._page(self._ceil(start + size - 1))): if p in self.page2map: return self._search(size, start=self.page2map[p].end, counter=counter + self.page2map[p].end - start) assert start + size <= (1 << self.addressbitsize) return start def mmapFile(self, addr, size, perms, filename, offset=0): """ Creates a new file mapping in the memory address space. @rtype: int @param addr: the starting address (took as hint). If C{addr} is C{0} the first big enough chunk of memory will be selected as starting address. @param size: the contents of a file mapping are initialized using C{size} bytes starting at offset C{offset} in the file C{filename}. @param perms: the access permissions to this memory. @param filename: the pathname to the file to map. @param offset: the contents of a file mapping are initialized using C{size} bytes starting at offset C{offset} in the file C{filename}. @return: the starting address where the file was mapped. @raise error: - "Address shall be concrete" if C{addr} is not an integer number. - "Address too big" if C{addr} goes beyond the limit of the memory. - "Map already used" if the piece of memory starting in C{addr} and with length C{size} isn't free. """ #If addr is NULL, the system determines where to allocate the region. if addr == None: addr = 0x10000000 assert type(addr) in [int, long], "Address shall be concrete" assert (addr <= ((1 << self.addressbitsize) - 1)), "Address too big" #address is rounded down to the nearest multiple of the allocation granularity addr = self._floor(addr) #size value is rounded up to the next page boundary size = self._ceil(size - 1) #If zero search for a spot addr = self._search(size, addr) #It should not be allocated for i in xrange(self._page(addr), self._page(addr + size)): assert not i in self.page2map, "Map already used" #Create the anonymous map m = MMapFile(addr, size, perms, filename, offset, addressbitsize=self.addressbitsize, pagebitsize=self.pagebitsize) #Okay, ready to alloc self.maps.add(m) #updating the page to map translation for i in range(self._page(m.start), self._page(m.end)): self.page2map[i] = m return addr def mmap(self, addr, size, perms, data_init=None): """ Creates a new mapping in the memory address space. @rtype: int @param addr: the starting address (took as hint). If C{addr} is C{0} the first big enough chunk of memory will be selected as starting address. @param size: the length of the mapping. @param perms: the access permissions to this memory. @param data_init: optional data to initialize this memory. @return: the starting address where the memory was mapped. @raise error: - "Address shall be concrete" if C{addr} is not an integer number. - "Address too big" if C{addr} goes beyond the limit of the memory. - "Map already used" if the piece of memory starting in C{addr} and with length C{size} isn't free. """ #If addr is NULL, the system determines where to allocate the region. if addr == None: addr = 0x10000000 assert type(addr) in [int, long], "Address shall be concrete" assert (addr <= ((1 << self.addressbitsize) - 1)), "Address too big" #address is rounded down to the nearest multiple of the allocation granularity addr = self._floor(addr) #size value is rounded up to the next page boundary size = self._ceil(size - 1) #If zero search for a spot addr = self._search(size, addr) #It should not be allocated for i in xrange(self._page(addr), self._page(addr + size)): assert not i in self.page2map, "Map already used" #Create the anonymous map m = MMapAnon(start=addr, size=size, perms=perms, data_init=data_init, addressbitsize=self.addressbitsize, pagebitsize=self.pagebitsize) #Okay, ready to alloc self.maps.add(m) #updating the page to map translation for i in range(self._page(m.start), self._page(m.end)): self.page2map[i] = m logger.debug("New memory map @%x size:%x", addr, size) return addr def mappings(self): """ Returns a sorted list of all the mappings for this memory. @rtype: list @return: a list of mappings. """ result = [] for m in self.maps: if isinstance(m, MMapAnon): result.append((m.start, m.end, m.perms, 0, '')) elif isinstance(m, MMapFile): result.append((m.start, m.end, m.perms, m.offset, m.filename)) else: result.append((m.start, m.end, m.perms, 0, '')) return sorted(result) def __str__(self): return '\n'.join([ "%016x-%016x % 4s %08x %s" % (start, end, p, offset, filename) for start, end, p, offset, filename in self.mappings() ]) def munmap(self, start, size): """ Deletes the mappings for the specified address range and causes further references to addresses within the range to generate invalid memory references. @param start: the starting address to delete. @param size: the length of the unmapping. """ start = self._floor(start) size = self._ceil(size - 1) #select all mappings that have at least 1 byte unmaped affected = set() p = self._page(start) while p < self._page(self._ceil(start + size)): if p in self.page2map: m = self.page2map[p] affected.add(m) p = self._page(m.end) else: p += 1 new_maps = [] for m in affected: #remove m pages from the page2maps.. for p in xrange(self._page(m.start), self._page(m.end)): del self.page2map[p] #remove m from the maps set self.maps.remove(m) #unmap the range from m possibly generating 0, 1 or 2 new maps new_maps += m.unmap(start, size) #reattach the newly generated maps (it may be none) for nm in new_maps: self.maps.add(nm) for p in xrange(self._page(nm.start), self._page(nm.end)): self.page2map[p] = nm logger.debug("Unmap memory @%x size:%x", start, size) def mprotect(self, start, size, perms): ''' Changes the access permissions to the memory mapped in the specified range. @param start: start range address. @param size: size of the range. @param perms: new permissions for the memory within the range. @todo: fix when fail return True./False/Exception? @todo: check perms and what happens if the same of existent perms. ''' start = self._floor(start) end = self._ceil(start + size - 1) size = end - start #select all mappings that have at least 1 byte mprotected affected = set() p = self._page(start) while p < self._page(end): if p in self.page2map.keys(): m = self.page2map[p] #if perms.replace(' ', '') != m.perms.replace(' ', ''): affected.add(m) p = self._page(m.end) else: p += 1 new_maps = [] for m in affected: #remove m pages from the page2maps.. for p in xrange(self._page(m.start), self._page(m.end - 1)): del self.page2map[p] #remove m from the maps set self.maps.remove(m) #unmap the range from m posibly generating 0, 1 or 2 new maps new_maps += m.mprotect(start, size, perms) #reattach the newly generated maps (it may be none) for nm in new_maps: self.maps.add(nm) for p in xrange(self._page(nm.start), self._page(nm.end)): self.page2map[p] = nm logger.debug("Change perms to memory @%x size:%x newperms: %s", start, size, perms) def _getMap(self, address): """ Returns the L{MMap} object containing the address. @rtype: L{MMap} @param address: the address to obtain its mapping. @todo: symbolic address """ return self.page2map[self._page(address)] #Permissions def getPermissions(self, address): """ Returns the permissions of an address. @rtype: str @param address: the address to obtain its permissions. @todo: symbolic address """ return self._getMap(address).perms def isValid(self, address): """ Returns C{True} if C{address} is a valid mapped address. @rtype: bool @param address: the address to know if it is valid or not. @return: - C{True} if the address is a valid mapped address. - C{False} if the address is not a valid mapped address. @todo: symbolic address """ return self._page(address) in self.page2map def isExecutable(self, address): """ Returns C{True} if C{address} is executable. @rtype: bool @param address: the address to know if it is executable or not. @return: - C{True} if the address is executable. - C{False} if the address is not executable. @todo: symbolic address """ return self.isValid(address) and self._getMap(address).isExecutable() def isWriteable(self, address): """ Returns C{True} if C{address} is writable. @rtype: bool @param address: the address to know if it is writable or not. @return: - C{True} if the address is writable. - C{False} if the address is not writable. @todo: symbolic address """ return self.isValid(address) and self._getMap(address).isWriteable() def isReadable(self, address): """ Returns C{True} if C{address} is readable. @rtype: bool @param address: the address to know if it is readable or not. @return: - C{True} if the address is readable. - C{False} if the address is not readable. @todo: symbolic address """ return self.isValid(address) and self._getMap(address).isReadable() #write and read potentially symbolic bytes at symbolic indexes def putchar(self, addr, data): """ Memory based putchar implementation. @param addr: the address where to put the data. @param data: character to put in this address of memory. @raise MemoryExcetion: if the address is not mapped. @todo: if addr is Readable/Executable? """ if not self.isValid(addr): raise MemoryException("Page Fault Writing", addr) m = self._getMap(addr) m.putchar(addr, data) return def getchar(self, addr): """ Memory based getchar implementation. @rtype: str[1] @param addr: the address where to obtain the character. @return: the character at the specified address. @raise MemoryExcetion: if the address is not mapped. @todo: if addr is Readable/Executable? """ if not self.isValid(addr): raise MemoryException("Page Fault Reading", addr) #Concrete case get the corresponding Map m = self._getMap(addr) return m.getchar(addr) #marshaling/pickle def __getstate__(self): state = {} state['addressbitsize'] = self.addressbitsize state['pagebitsize'] = self.pagebitsize state['maps'] = self.maps return state def __setstate__(self, state): self.addressbitsize = state['addressbitsize'] self.pagebitsize = state['pagebitsize'] self.maps = state['maps'] self.page2map = WeakValueDictionary() for m in self.maps: for i in range(self._page(m.start), self._page(m.end)): self.page2map[i] = m
class Memory(object, metaclass=ABCMeta): """ The memory manager. This class handles all virtual memory mappings and symbolic chunks. """ def __init__(self, maps=None, cpu=StubCPU()): """ Builds a memory manager. """ super().__init__() if maps is None: self._maps = set() else: self._maps = set(maps) self.cpu = cpu self._page2map = WeakValueDictionary() # {page -> ref{MAP}} self._recording_stack = [] for m in self._maps: for i in range(self._page(m.start), self._page(m.end)): assert i not in self._page2map self._page2map[i] = m def __reduce__(self): return (self.__class__, (self._maps, self.cpu)) @property @abstractmethod def memory_bit_size(self): return 32 @property @abstractmethod def page_bit_size(self): return 12 @property def memory_size(self): return 1 << self.memory_bit_size @property def page_size(self): return 1 << self.page_bit_size @property def memory_mask(self): return self.memory_size - 1 @property def page_mask(self): return self.page_size - 1 @property def maps(self): return self._maps def _ceil(self, address): """ Returns the smallest page boundary value not less than the address. :rtype: int :param address: the address to calculate its ceil. :return: the ceil of C{address}. """ return (((address - 1) + self.page_size) & ~self.page_mask) & self.memory_mask def _floor(self, address): """ Returns largest page boundary value not greater than the address. :param address: the address to calculate its floor. :return: the floor of C{address}. :rtype: int """ return address & ~self.page_mask def _page(self, address): """ Calculates the page number of an address. :param address: the address to calculate its page number. :return: the page number of address. :rtype: int """ return address >> self.page_bit_size def _search(self, size, start=None, counter=0): """ Recursively searches the address space for enough free space to allocate C{size} bytes. :param size: the size in bytes to allocate. :param start: an address from where to start the search. :param counter: internal parameter to know if all the memory was already scanned. :return: the address of an available space to map C{size} bytes. :raises MemoryException: if there is no space available to allocate the desired memory. :rtype: int todo: Document what happens when you try to allocate something that goes round the address 32/64 bit representation. """ assert size & self.page_mask == 0 if start is None: end = {32: 0xF8000000, 64: 0x0000800000000000}[self.memory_bit_size] start = end - size else: if start > self.memory_size - size: start = self.memory_size - size end = start + size consecutive_free = 0 for p in range(self._page(end - 1), -1, -1): if p not in self._page2map: consecutive_free += 0x1000 else: consecutive_free = 0 if consecutive_free >= size: return p << self.page_bit_size counter += 1 if counter >= self.memory_size // self.page_size: raise MemoryException("Not enough memory") return self._search(size, self.memory_size - size, counter) def mmapFile(self, addr, size, perms, filename, offset=0): """ Creates a new file mapping in the memory address space. :param addr: the starting address (took as hint). If C{addr} is C{0} the first big enough chunk of memory will be selected as starting address. :param size: the contents of a file mapping are initialized using C{size} bytes starting at offset C{offset} in the file C{filename}. :param perms: the access permissions to this memory. :param filename: the pathname to the file to map. :param offset: the contents of a file mapping are initialized using C{size} bytes starting at offset C{offset} in the file C{filename}. :return: the starting address where the file was mapped. :rtype: int :raises error: - 'Address shall be concrete' if C{addr} is not an integer number. - 'Address too big' if C{addr} goes beyond the limit of the memory. - 'Map already used' if the piece of memory starting in C{addr} and with length C{size} isn't free. """ # If addr is NULL, the system determines where to allocate the region. assert addr is None or isinstance(addr, int), "Address shall be concrete" assert size > 0 self.cpu._publish("will_map_memory", addr, size, perms, filename, offset) # address is rounded down to the nearest multiple of the allocation granularity if addr is not None: assert addr < self.memory_size, "Address too big" addr = self._floor(addr) # size value is rounded up to the next page boundary size = self._ceil(size) # If zero search for a spot addr = self._search(size, addr) # It should not be allocated for i in range(self._page(addr), self._page(addr + size)): assert i not in self._page2map, "Map already used" # Create the map m = FileMap(addr, size, perms, filename, offset) # Okay, ready to alloc self._add(m) logger.debug("New file-memory map @%x size:%x", addr, size) self.cpu._publish("did_map_memory", addr, size, perms, filename, offset, addr) return addr def mmap(self, addr, size, perms, data_init=None, name=None): """ Creates a new mapping in the memory address space. :param addr: the starting address (took as hint). If C{addr} is C{0} the first big enough chunk of memory will be selected as starting address. :param size: the length of the mapping. :param perms: the access permissions to this memory. :param data_init: optional data to initialize this memory. :param name: optional name to give to this mapping :return: the starting address where the memory was mapped. :raises error: - 'Address shall be concrete' if C{addr} is not an integer number. - 'Address too big' if C{addr} goes beyond the limit of the memory. - 'Map already used' if the piece of memory starting in C{addr} and with length C{size} isn't free. :rtype: int """ # If addr is NULL, the system determines where to allocate the region. assert addr is None or isinstance(addr, int), "Address shall be concrete" self.cpu._publish("will_map_memory", addr, size, perms, None, None) # address is rounded down to the nearest multiple of the allocation granularity if addr is not None: assert addr < self.memory_size, "Address too big" addr = self._floor(addr) # size value is rounded up to the next page boundary size = self._ceil(size) # If zero search for a spot addr = self._search(size, addr) # It should not be allocated for i in range(self._page(addr), self._page(addr + size)): assert i not in self._page2map, "Map already used" # Create the anonymous map m = AnonMap(start=addr, size=size, perms=perms, data_init=data_init, name=name) # Okay, ready to alloc self._add(m) logger.debug("New memory map @%x size:%x", addr, size) self.cpu._publish("did_map_memory", addr, size, perms, None, None, addr) return addr def _add(self, m): assert isinstance(m, Map) assert m not in self._maps assert m.start & self.page_mask == 0 assert m.end & self.page_mask == 0 self._maps.add(m) # updating the page to map translation for i in range(self._page(m.start), self._page(m.end)): self._page2map[i] = m def _del(self, m): assert isinstance(m, Map) assert m in self._maps # remove m pages from the page2maps.. for p in range(self._page(m.start), self._page(m.end)): del self._page2map[p] # remove m from the maps set self._maps.remove(m) def map_containing(self, address): """ Returns the L{MMap} object containing the address. :param address: the address to obtain its mapping. :rtype: L{MMap} @todo: symbolic address """ page_offset = self._page(address) if page_offset not in self._page2map: raise MemoryException("Page not mapped", address) return self._page2map[page_offset] def mappings(self): """ Returns a sorted list of all the mappings for this memory. :return: a list of mappings. :rtype: list """ result = [] for m in self.maps: if isinstance(m, AnonMap): result.append((m.start, m.end, m.perms, 0, "")) elif isinstance(m, FileMap): result.append((m.start, m.end, m.perms, m._offset, m._filename)) else: result.append((m.start, m.end, m.perms, 0, m.name)) return sorted(result) def __str__(self): return "\n".join( [ f'{start:016x}-{end:016x} {p:>4s} {offset:08x} {name or ""}' for start, end, p, offset, name in self.mappings() ] ) def _maps_in_range(self, start, end): """ Generates the list of maps that overlaps with the range [start:end] """ # Search for the first matching map addr = start while addr < end: if addr not in self: addr += self.page_size else: m = self._page2map[self._page(addr)] yield m addr = m.end def munmap(self, start, size): """ Deletes the mappings for the specified address range and causes further references to addresses within the range to generate invalid memory references. :param start: the starting address to delete. :param size: the length of the unmapping. """ start = self._floor(start) end = self._ceil(start + size) self.cpu._publish("will_unmap_memory", start, size) for m in self._maps_in_range(start, end): self._del(m) head, tail = m.split(start) middle, tail = tail.split(end) assert middle is not None if head: self._add(head) if tail: self._add(tail) self.cpu._publish("did_unmap_memory", start, size) logger.debug(f"Unmap memory @{start:x} size:{size:x}") def mprotect(self, start, size, perms): assert size > 0 start = self._floor(start) end = self._ceil(start + size) self.cpu._publish("will_protect_memory", start, size, perms) for m in self._maps_in_range(start, end): self._del(m) head, tail = m.split(start) middle, tail = tail.split(end) assert middle is not None middle.perms = perms self._add(middle) if head: self._add(head) if tail: self._add(tail) self.cpu._publish("did_protect_memory", start, size, perms) # Permissions def __contains__(self, address): return self._page(address) in self._page2map def perms(self, index): # not happy with this interface. if isinstance(index, slice): # get the more restrictive set of perms for the range raise NotImplementedError("No perms for slices") else: return self.map_containing(index).perms def access_ok(self, index, access, force=False): if isinstance(index, slice): assert index.stop - index.start >= 0 addr = index.start while addr < index.stop: if addr not in self: return False m = self.map_containing(addr) if not force and not m.access_ok(access): return False until_next_page = min(m.end - addr, index.stop - addr) addr += until_next_page assert addr == index.stop return True else: if index not in self: return False m = self.map_containing(index) return force or m.access_ok(access) # write and read potentially symbolic bytes at symbolic indexes def read(self, addr, size, force=False): if not self.access_ok(slice(addr, addr + size), "r", force): raise InvalidMemoryAccess(addr, "r") assert size > 0 result = [] stop = addr + size p = addr while p < stop: m = self.map_containing(p) _size = min(m.end - p, stop - p) result += m[p : p + _size] p += _size assert p == stop return result def push_record_writes(self): """ Begin recording all writes. Retrieve all writes with `pop_record_writes()` """ self._recording_stack.append([]) def pop_record_writes(self): """ Stop recording trace and return a `list[(address, value)]` of all the writes that occurred, where `value` is of type list[str]. Can be called without intermediate `pop_record_writes()`. For example:: mem.push_record_writes() mem.write(1, 'a') mem.push_record_writes() mem.write(2, 'b') mem.pop_record_writes() # Will return [(2, 'b')] mem.pop_record_writes() # Will return [(1, 'a'), (2, 'b')] Multiple writes to the same address will all be included in the trace in the same order they occurred. :return: list[tuple] """ lst = self._recording_stack.pop() # Append the current list to a previously-started trace. if self._recording_stack: self._recording_stack[-1].extend(lst) return lst def write(self, addr, buf, force=False): size = len(buf) if not self.access_ok(slice(addr, addr + size), "w", force): raise InvalidMemoryAccess(addr, "w") assert size > 0 stop = addr + size start = addr if self._recording_stack: self._recording_stack[-1].append((addr, buf)) while addr < stop: m = self.map_containing(addr) size = min(m.end - addr, stop - addr) m[addr : addr + size] = buf[addr - start : addr - start + size] addr += size assert addr == stop def _get_size(self, size): return size def __setitem__(self, index, value): if isinstance(index, slice): size = self._get_size(index.stop - index.start) assert len(value) == size # raise proper Error? self.write(index.start, value) else: self.write(index, (value,)) def __getitem__(self, index): if isinstance(index, slice): result = self.read(index.start, index.stop - index.start) else: result = self.read(index, 1)[0] return result def __iter__(self): """ Iterate all valid addresses """ for page_addr in sorted(self._page2map.keys()): start = page_addr * self.page_size end = start + self.page_size for addr in range(start, end): yield addr
class PersistentDict(object): """ Mapping object that is persistently stored :param store_uri: URI for storing buckets; see :py:class:`~BaseBucketStore` :type store_uri: :py:class:`str` :param bucket_count: number of buckets to use for storing data :type bucket_count: :py:class:`int` :param bucket_salt: salt for finding buckets to store data :type bucket_salt: :py:class:`int` :param cache_size: number of buckets to LRU-cache in memory :type cache_size: :py:class:`int` :param cache_keys: whether to cache all keys in memory :type cache_keys: :py:class:`bool` """ persistent_defaults = { 'bucket_count': 32, 'bucket_salt': 0, } def __init__(self, store_uri, bucket_count=NOTSET, bucket_salt=NOTSET, cache_size=3, cache_keys=True): self._bucket_store = BaseBucketStore.from_uri(store_uri=store_uri, default_scheme='file') # set empty fields self._bucket_count = None self._bucket_salt = None self._bucket_keys = set() self.bucket_key_fmt = None self._keys_cache = None self._bucket_cache = None self._cache_size = None # load current settings try: for attr, value in self._bucket_store.fetch_head().items(): setattr(self, attr, value) self._update_bucket_key_fmt() except BucketNotFound: pass # apply new settings self.bucket_count = bucket_count self.bucket_salt = bucket_salt # LRU store for objects fetched from disk self.cache_size = cache_size # weakref store for objects still in use self._active_buckets = WeakValueDictionary() self._active_items = WeakValueDictionary() # store new settings self._store_head() # cache keys in memory self.cache_keys = cache_keys @property def store_uri(self): return self._bucket_store.store_uri # Settings def _store_head(self): """ Store the meta-information of the dict """ self._bucket_store.store_head({ attr: getattr(self, attr) for attr in # work directly on internal values, setters are called as part of init for finalization ('_bucket_count', '_bucket_salt', '_bucket_keys') }) def _bucket_fmt_digits(self, bucket_count=None): """Return the number of hex digits required for the bucket name""" bucket_count = bucket_count or self._bucket_count return max(int(math.ceil(math.log(bucket_count, 16))), 1) # exposed settings @property def cache_size(self): return self._cache_size @cache_size.setter def cache_size(self, value): self._cache_size = int(value or 1) self._bucket_cache = deque(maxlen=self.cache_size) @property def bucket_salt(self): """ Get/Set the ``bucket_salt`` of the persistent mapping :note: Setting ``bucket_salt`` causes **all** buckets storing data to be recreated. Until the new buckets have been created, changes to the mapping content may be silently dropped. """ return self._bucket_salt @bucket_salt.setter def bucket_salt(self, value): # default if unset if value == NOTSET: if self._bucket_salt is not None: return self._bucket_salt = self.persistent_defaults['bucket_salt'] else: value = int(value) # no change if self._bucket_salt == value: return # uninitialized, we don't have content yet elif self._bucket_salt is None: self._bucket_salt = value # TODO: allow resalting backend else: raise NotImplementedError('Changing bucket salt not implemented yet') self._update_bucket_key_fmt() @property def bucket_count(self): """ Get/Set the ``bucket_count`` of the persistent mapping :note: Setting ``bucket_count`` causes **all** buckets storing data to be recreated. Until the new buckets have been created, changes to the mapping content may be silently dropped. """ return self._bucket_count @bucket_count.setter def bucket_count(self, value): # default if unset if value == NOTSET: if self._bucket_count is not None: return self._bucket_count = self.persistent_defaults['bucket_count'] else: value = int(value) if value < 1: raise ValueError('At least one bucket must be used') # no change elif self._bucket_count == value: return # uninitialized, we don't have content yet elif self._bucket_count is None: self._bucket_count = value # TODO: allow resizing backend else: raise NotImplementedError('Changing bucket count not implemented yet') # apply secondary settings self._update_bucket_key_fmt() @property def cache_keys(self): return self._keys_cache is not None @cache_keys.setter def cache_keys(self, value): if value and self._keys_cache is None: # switch on self._keys_cache = set(self.keys()) elif not value and self._keys_cache is not None: # switch off self._keys_cache = None def _update_bucket_key_fmt(self): # key: count, salt, index self.bucket_key_fmt = "pdictbkt_%(bucket_count)x%(bucket_salt)s%%0%(index_digits)dx" % { 'bucket_count': self.bucket_count, 'bucket_salt': HASHKEY_HEXFMT % hashkey(self.bucket_salt, self.bucket_salt), 'index_digits': self._bucket_fmt_digits(), } # bucket management def _bucket_key(self, key): """ Create the bucket identifier for a given key :param key: key to the content in-memory :return: key to the bucket stored persistently :rtype: str """ return self.bucket_key_fmt % (hashkey(key) % self._bucket_count) def _fetch_bucket(self, bucket_key): """ Return a bucket from disk or create a new one :param bucket_key: key for the bucket :return: bucket for ``bucket_key`` :rtype: :py:class:`~DictBucket` """ try: bucket = self._bucket_store.fetch_bucket(bucket_key=bucket_key) except BucketNotFound: bucket = DictBucket() self._active_buckets[bucket_key] = bucket self._bucket_cache.appendleft(bucket) return bucket def _get_bucket(self, bucket_key): """ Return the appropriate bucket May return the cached bucket if available. :param bucket_key: key for the bucket :return: bucket for ``bucket_key`` :rtype: :py:class:`~DictBucket` """ try: return self._active_buckets[bucket_key] except KeyError: return self._fetch_bucket(bucket_key) def _store_bucket(self, bucket_key, bucket=None): """ Store a bucket on disk :param bucket_key: key for the entire bucket """ if bucket is None: try: bucket = self._active_buckets[bucket_key] except KeyError: return if bucket: self._bucket_store.store_bucket(bucket_key=bucket_key, bucket=bucket) self._add_bucket_key(bucket_key) # free empty buckets else: self._bucket_store.free_bucket(bucket_key) self._discard_bucket_key(bucket_key) def _add_bucket_key(self, bucket_key): if bucket_key not in self._bucket_keys: self._bucket_keys.add(bucket_key) self._store_head() def _discard_bucket_key(self, bucket_key): if bucket_key in self._bucket_keys: self._bucket_keys.remove(bucket_key) self._store_head() # cache management # Item cache def _set_cached_item(self, key, item): """Cache reference to existing item""" try: self._active_items[key] = item except TypeError: pass def _get_cached_item(self, key): """Get reference to existing item; raises KeyError if item cannot be fetched""" try: return self._active_items[key] except TypeError: raise KeyError def _del_cached_item(self, key): """Release reference to existing item""" try: del self._active_items[key] except (TypeError, KeyError): pass # paths and files def flush(self): """ Commit all outstanding changes to persistent store """ for bucket_key, bucket in self._active_buckets.items(): self._store_bucket(bucket_key, bucket) # dictionary interface def __getitem__(self, key): # - use cached reference to existing item # - fetch item from cached reference to existing bucket # - fetch item from fetched bucket try: return self._get_cached_item(key) except KeyError: bucket = self._get_bucket(self._bucket_key(key)) item = bucket[key] self._set_cached_item(key, item) return item def __setitem__(self, key, value): bucket_key = self._bucket_key(key) bucket = self._get_bucket(bucket_key) bucket[key] = value self._store_bucket(bucket_key, bucket) if self._keys_cache is not None: self._keys_cache.add(key) # update item cache self._set_cached_item(key, value) def __delitem__(self, key): bucket_key = self._bucket_key(key) bucket = self._get_bucket(bucket_key) del bucket[key] self._store_bucket(bucket_key) if self._keys_cache is not None: self._keys_cache.discard(key) self._del_cached_item(key) # container protocol def __contains__(self, key): if self._keys_cache is not None: return key in self._keys_cache elif key in self._active_items: return True else: bucket = self._get_bucket(self._bucket_key(key)) return key in bucket def __len__(self): # try cached if self._keys_cache is not None: return len(self._keys_cache) # count each bucket, see 'keys' for iteration scheme read_buckets, length = set(), 0 # start with the buckets we have in memory for bucket_key in self._active_buckets.keys(): length += len(self._active_buckets[bucket_key]) read_buckets.add(bucket_key) # pull in remaining buckets for bucket_key in self._bucket_keys: if bucket_key not in read_buckets: length += len(self._fetch_bucket(bucket_key)) read_buckets.add(bucket_key) return length def __bool__(self): # can only have items if we have buckets return bool(self._bucket_keys) __nonzero__ = __bool__ def __eq__(self, other): # other is pdict, try some fast comparisons if isinstance(other, PersistentDict): # we are the same store if ( self._bucket_store == other._bucket_store and self.bucket_count == other.bucket_count and self.bucket_salt == other.bucket_salt ): return True # different keys, cannot be equal if self._keys_cache is not None and self._keys_cache != other._keys_cache: return False # not a mapping, cannot be equal elif not isinstance(other, abc.Mapping): return False # no fast path resolved... # try a not-quite slow path if len(self) // self.bucket_count <= self.cache_size: # we're probably in memory already, just rewrap content return self.copy() == other return all(other[key] == value for key, value in self.items()) def __ne__(self, other): return not self == other def __iter__(self): """:see: :py:meth:`~.PersistentDict.keys`""" read_buckets = set() # start with the buckets we have in memory for bucket_key in self._active_buckets.keys(): for item_key in self._active_buckets[bucket_key].keys(): yield item_key read_buckets.add(bucket_key) # pull in all buckets for bucket_key in self._bucket_keys: if bucket_key not in read_buckets: bucket = self._fetch_bucket(bucket_key) for item_key in bucket.keys(): yield item_key read_buckets.add(bucket_key) # dictionary methods def get(self, key, default=None): """ Return the value for key if key is in the dictionary, else default. If default is not given, it defaults to ``None``, so that this method never raises a :py:exc:`KeyError`. :param key: key to an item in the dictionary :param default: default to return if no item exists :raises KeyError: if no items exists and no default is given """ try: return self[key] except KeyError: return default def pop(self, key, default=NOTSET): """ If ``key`` is in the dictionary, remove it and return its value, else return ``default``. If ``default`` is not given and ``key`` is not in the dictionary, a KeyError is raised. :param key: key to an item in the dictionary :param default: default to return if no item exists :raises KeyError: if no items exists and no default is given """ try: item = self[key] del self[key] except KeyError: if default is NOTSET: raise item = default return item def popitem(self): """ Remove and return an arbitrary (key, value) pair from the dictionary. popitem() is useful to destructively iterate over a dictionary, as often used in set algorithms. If the dictionary is empty, calling popitem() raises a KeyError. :raises KeyError: if no items exists and no default is given """ try: key = next(iter(self)) except StopIteration: raise KeyError else: return key, self.pop(key) def setdefault(self, key, default=None): """ If key is in the dictionary, return its value. If not, insert key with a value of ``default`` and return ``default``. ``default`` defaults to ``None``. :param key: key to an item in the dictionary :param default: default to insert and return if no item exists """ try: return self[key] except KeyError: self[key] = default return default def clear(self): """Remove all items from the dictionary.""" # clear persistent storage for bucket_key in self._bucket_keys: self._bucket_store.free_bucket(bucket_key=bucket_key) self._bucket_keys = type(self._bucket_keys)() self._store_head() # reset caches self._bucket_cache = deque(maxlen=self.cache_size) self._active_buckets = type(self._active_buckets)() self._active_items = type(self._active_items)() self._keys_cache = None if self._keys_cache is None else type(self._keys_cache)() def update(self, other=None, **kwargs): """ Update the dictionary with the ``(key,value)`` pairs from other, overwriting existing keys. :py:meth:`~.PersistentDict.update` accepts either another dictionary object or an iterable of ``(key,value)`` pairs (as tuples or other iterables of length two). If keyword arguments are specified, the dictionary is then updated with those ``(key,value)`` pairs: ``d.update(red=1, blue=2)``. :param other: mapping or iterable of ``(key,value)`` pairs :param kwargs: ``key=value`` arguments to insert :return: None :note: This function is faster for large collections as changes are made per bucket, not per item. The drawback is a larger memory consumption as the entire input is sorted in memory. """ def updatebuckets(key_values): """ Commit entire buckets from key, value pairs :param key_values: iterable of ``(key, value)`` pairs """ # sort kvs by bucket key_values = sorted(key_values, key=lambda key_val: self._bucket_key(key_val[0])) # insert kvs by bucket last_bucket_key, bucket = None, None for key, value in key_values: bucket_key = self._bucket_key(key) # cycle to next bucket if current one is done if bucket_key != last_bucket_key: if last_bucket_key is not None: self._store_bucket(last_bucket_key) last_bucket_key = bucket_key bucket = self._get_bucket(bucket_key) # update bucket bucket[key] = value # update caches if self._keys_cache is not None: self._keys_cache.add(key) self._set_cached_item(key, value) # commit outstanding bucket, if any if last_bucket_key is not None: self._store_bucket(last_bucket_key) if other is not None: # mapping types if hasattr(other, "items"): # dictionary updatebuckets(other.items()) elif hasattr(other, "keys"): # partial dictionary updatebuckets((key, other[key]) for key in other.keys()) elif isinstance(other, abc.Mapping): updatebuckets((key, other[key]) for key in other) else: # sequence updatebuckets(other) updatebuckets(kwargs.items()) # iterations def keys(self): """ :__doc__: If ``d.cache_keys == True``, the view provides keys without access to the persistent backend, but in arbitrary order. This is likely to jump between persistent buckets. If ``d.cache_keys == False``, iteration is aligned to buckets - this is only an implementation detail and may change in the future. If you need aligned iteration, use ``for key in d`` or directly access :py:meth:`.items`. :note: See the note on iterator equivalency for :py:meth:`~.PersistentDict.items`. """ return PersistentDictKeysView(self) def items(self): """ :__doc__: This iterates over all keys in a semi-deterministic way. First, all keys from buckets cached in memory are returned. Following this, keys from the remaining buckets are returned. :note: Due to aligning keys to buckets, this function does not benefit from ``d.cache_keys == True``. :note: Since the state of the mapping also depends on accesses, the strict guarantee for iteration sequence equivalence given by ``dict`` is not replicated. Thus, it cannot be assumed that ``d.items() == zip(d.values(), d.keys()) == [(v, k) for (k, v) in d]`` holds true in any case. """ return PersistentDictItemsView(self) def values(self): """ :__doc__: :note: See the note on iterator equivalency for :py:meth:`~.PersistentDict.items`. """ return PersistentDictValuesView(self) # high level operations def copy(self): """ :__doc__: :note: This will return a ``dict``, not a :py:class:`~.PersistentDict`. """ return dict(self.items()) def __repr__(self): return "%s(bucket_store=%r, bucket_count=%r, cache_size=%r, cache_keys=%r, items={%s})" % ( self.__class__.__name__, self._bucket_store, self.bucket_count, self.cache_size, self._keys_cache is not None, self.__repr_content(), ) def __repr_content(self): # pragma: no cover reprs = [] read_keys = set() for bucket_key in self._active_buckets.keys(): try: bucket = self._active_buckets[bucket_key] if not bucket: continue reprs.append(repr(bucket)[1:-1]) read_keys.update(bucket.keys()) except KeyError: pass if self._keys_cache is None: reprs.append(", ...") elif self._keys_cache: cache_repr = ": <?>, ".join(repr(key) for key in self._keys_cache if key not in read_keys) if cache_repr: reprs.append(cache_repr + ": <?>") return ",".join(reprs)