class Primer(): '''An asynchronous cache implementation. Maintains multiple recursive calls stably.''' def __init__(self,func): for n in list(n for n in set(dir(func)) - set(dir(self)) if n != '__class__'): setattr(self, n, getattr(func, n)) self._m=Manager() self._e= self._m.Event() self._d=self._m.dict() self._f=dumps(func.__code__) self._n=func.__name__ self._q=Queue() self.func=FunctionType(loads(self._f),globals(),"a_func") globals()[self._n]=partial(_getValue,self._d,self._q,self._e,True,self.func) globals()[self._n].apply_async=partial(_getValue,self._d,self._q,self._e,False,self.func) self._t=Process(target=_taskManager,args=(self._q,self._d,self._f,self._n, self._e)) self._t.start() def apply_async(self,*item): return _getValue(self._d,self._q,self._e,False,self.func,*item) def __call__(self,*item): return _getValue(self._d,self._q,self._e,True,self.func,*item) def __del__(self): self._t.terminate() def __repr__(self): return 'concurrent.Cache('+self.func.__repr__()+')'
class Cache(): ''' An asynchronous cache implementation. Maintains multiple recursive calls stably. The resultant object operates just like a function, but runs the code outside the main process. When calls are started with :meth:`~Cache.apply_async`, a new process is created to evaluate the call. A simple cache can reduce recursive functions such as the naive Fibonacci function to linear time in the input space, whereas a parallel cache can reduce certain problems even farther, depending on the layout of the call and the number of processors available on a computer. The code below demonstrates using :class:`Cache` as a simple cache:: >>> @Cache ... def fibonacci(n): ... if n < 2: # Not bothering with input value checking here. ... return 1 ... return fibonacci(n-1)+fibonacci(n-2) ... >>> fibonacci(5) 8 Using cache to take advantage of the ability to handle recursion branching, that same code would become:: >>> @Cache ... def fibonacci(n): ... if n < 2: # Not bothering with input value checking here. ... return 1 ... fibonacci.apply_async(n-1) ... fibonacci.apply_async(n-2) ... return fibonacci(n-1)+fibonacci(n-2) ... >>> fibonacci(100) 573147844013817084101L .. note:: Be careful when picking how to call your functions if you are looking for speed. Given that the fibonacci sequence is roughly linear in dependencies with caching, there isn't a significant speedup. When in doubt, :mod:`cProfile` (or :mod:`profile`) are your friends. .. todo:: Eventually provide automatic profiling to help with this part. A good use for this would be in less sequential computation spaces, such as in factoring. When a pair of factors are found, each can be factored asynchronously to find all the prime factors recursively. When a factor in a factor pair is found that are known to be prime, or otherwise has its factors known, then only one needs to be factored further. At this point, blindly branching and factoring will have one side yield the cached value, and the other creating a new process. Given the Fibonacci example above, this will happen on every call that isn't the first call, yielding to `n` processes being spawned and using system resources. Simply caching the naive Fibonacci function is just about the fastest way to use it. To avoid unnecessary branching automatically, you can use the batch_async method similarly to the apply_async method, except each set of arguments, even if they're singular, must be wrapped in a tuple. Applying this to the Fibonacci function yields. >>> @Cache ... def fibonacci(n): ... if n < 2: # Not bothering with input value checking here. ... return 1 ... fibonacci.batch_async((n-1,),(n-2,)) ... return fibonacci(n-1)+fibonacci(n-2) ... >>> fibonacci(200) 453973694165307953197296969697410619233826L This makes the branching optimal whenever possible. Race conditions might cause issues, but those caused by python's built in Manager cannot be mitigated easily. For the fibonnacci sequence, this will likely just revert the computation to a mostly synchronous and sequential calculation, which is optimal for this version of calculating the Fibonacci sequence. .. note:: There are `much better algorithms <http://en.wikipedia.org/wiki/Fibonacci_sequence#Matrix_form>`_ for calculating Fibonacci sequence elements; some of which are better suited for this type of caching. ''' # Additionally, one can test whether a value has been calculated before by using # ``(*{item}) in {cache}``. Calls of this type will be faster if iterable objects # are passed to the ``in`` operator. This allows one to avoid unnecessary branching # and process creation. Using this, the same example becomes:: # # >>> @Cache # ... def fibonacci(n): # ... if n < 2: # Not bothering with input value checking here. # ... return 1 # ... if n-1 not in fibonacci or n-2 not in fibonacci: # ... fibonacci.apply_async(n-1) # ... fibonacci.apply_async(n-2) # ... return fibonacci(n-1)+fibonacci(n-2) # ... # >>> fibonacci(5) # 8 def __init__(self,func): for n in list(n for n in set(dir(func)) - set(dir(self)) if n != '__class__'): setattr(self, n, getattr(func, n)) setattr(self, "__doc__", getattr(func, "__doc__")) self._m=Manager() self._e= self._m.Event() self._d=self._m.dict() self._f=dumps(func.__code__) self._n=func.__name__ self._q=Queue() self.func=FunctionType(loads(self._f),globals(),"a_func") globals()[self._n]=partial(_getValue,self._d,self._q,self._e,True,self.func) globals()[self._n].apply_async=partial(_getValue,self._d,self._q,self._e,False,self.func) globals()[self._n].batch_async=partial(_batchAsync,self._d,self._q,self.func) #setattr(globals()[self._n],"__contains__",self.__contains__) self._t=Process(target=_taskManager,args=(self._q,self._d,self._f,self._n, self._e)) self._t.start() atexit.register(_closeProcessGracefully, self) #TODO: Make this line not necessary. def apply_async(self,*item): """ Calling this method starts up a new process of the function call in question. This does not retrieve an answer. """ return _getValue(self._d,self._q,self._e,False,self.func,*item) def batch_async(self,*items): """ This method examines the arguments passed in for how to branch optimally then does so. This does not retrieve the answers, just like apply_async does not. The arguments must each be a complete set of the arguments passed into the function but in tuple form. If the cached function only takes one argument, wrap it with parenthesis and add a comma before the closing parenthesis. """ _batchAsync(self._d,self._q,self.func,*items) def __call__(self,*item): return _getValue(self._d,self._q,self._e,True,self.func,*item) def __del__(self): _closeProcessGracefully(self) def __repr__(self): return 'concurrent.Cache('+self.func.__repr__()+')'