def getsharedsequencefactory(startvalue, intervallen=5000): """ Creates a factory for parallel readers of a sequence. Returns a callable f. When f() is called, it returns a callable g. Whenever g(*args) is called, it returns a unique int from a sequence (if several g's are created, the order of the calls may lead to that the returned ints are not ordered, but they will be unique). The arguments to g are ignored, but accepted. Thus g can be used as idfinder for [Decoupled]Dimensions. The different g's can be used safely from different processes and threads. Arguments: - startvalue: The first value to return. If None, 0 is assumed. - intervallen: The amount of numbers that a single g from above can return before synchronization is needed to get a new amount. Default: 5000. """ if startvalue is None: startvalue = 0 # We use a Queue to ensure that intervals are only given to one deliverer values = multiprocessing.Queue(10) # A worker that fills the queue def valuegenerator(nextval): sys.excepthook = _getexcepthook() while True: values.put((nextval, nextval + intervallen)) nextval += intervallen p = multiprocessing.Process(target=valuegenerator, args=(startvalue, )) p.daemon = True p.start() # A generator that repeatedly gets an interval from the queue and returns # all numbers in that interval before it gets a new interval and goes on # ... def valuedeliverer(): while True: interval = values.get() for i in range(*interval): yield i # A factory method for the object the end-consumer calls def factory(): generator = valuedeliverer() # get a unique generator # The method called (i.e., the g) by the end-consumer def getnextseqval(*ignored): return next(generator) return getnextseqval return factory
def _getexitfunction(): """Return a function that halts the execution of pygrametl. pygrametl uses the function as excepthook in spawned processes such that an uncaught exception halts the entire execution. """ # On Java, System.exit will do as there are no separate processes if sys.platform.startswith('java'): def javaexitfunction(): import java.lang.System java.lang.System.exit(1) return javaexitfunction # else see if the os module provides functions to kill process groups; # this should be the case on UNIX. import signal if hasattr(os, 'getpgrp') and hasattr(os, 'killpg'): def unixexitfunction(): procgrp = os.getpgrp() os.killpg(procgrp, signal.SIGTERM) return unixexitfunction # else, we are on a platform that does not allow us to kill a group. # We make a special process that gets the pids of all calls to # this procedure. The function we return, informs this process to kill # all processes it knows about. # set up the terminator global _toterminator if _toterminator is None: _toterminator = multiprocessing.Queue() def terminatorfunction(): pids = set([_masterpid]) while True: item = _toterminator.get() if isinstance(item, int): pids.add(item) else: # We take it as a signal to kill all for p in pids: # we don't know which signals exist; use 9 os.kill(p, 9) return terminatorprocess = multiprocessing.Process(target=terminatorfunction) terminatorprocess.daemon = True terminatorprocess.start() # tell the terminator about this process _toterminator.put(os.getpid()) # return a function that tells the terminator to kill all known processes def exitfunction(): _toterminator.put('TERMINATE') return exitfunction
def shareconnectionwrapper(targetconnection, maxclients=10, userfuncs=()): """Share a ConnectionWrapper between several processes/threads. When Decoupled objects are used, they can try to update the DW at the same time. They can use several ConnectionWrappers to avoid race conditions, but this is not transactionally safe. Instead, they can use a "shared" ConnectionWrapper obtained through this function. When a ConnectionWrapper is shared, it is executing in a separate process (or thread, in case Jython is used) and ensuring that only one operation takes place at the time. This is hidden from the users of the shared ConnectionWrapper. They see an interface similar to the normal ConnectionWrapper. When this method is called, it returns a SharedConnectionWrapperClient which can be used as a normal ConnectionWrapper. Each process (i.e., each Decoupled object) should, however, get a unique SharedConnectionWrapperClient by calling copy() on the returned SharedConnectionWrapperClient. Note that a shared ConnectionWrapper needs to hold the complete result of each query in memory until it is fetched by the process that executed the query. Again, this is hidden from the users. It is also possible to add methods to a shared ConnectionWrapper when it is created. When this is done and the method is invoked, no other operation will modify the DW at the same time. If, for example, the functions foo and bar are added to a shared ConnectionWrapper (by passing the argument userfuncs=(foo, bar) to shareconnectionwrapper), the returned SharedConnectionWrapperClient will offer the methods foo and bar which when called will be running in the separate process for the shared ConnectionWrapper. This is particularly useful for user-defined bulk loaders as used by BulkFactTable: def bulkload(): # DBMS-specific code here. # No other DW operation should take place concurrently scw = shareconnectionwrapper(ConnectionWrapper(...), userfuncs=(bulkload,)) facttbl = BulkFact(..., bulkloader=scw.copy().bulkload) #Note the .copy(). Arguments: - targetconnection: a pygrametl ConnectionWrapper - maxclients: the maximum number of concurrent clients. Default: 10 - userfuncs: a sequence of functions to add to the shared ConnectionWrapper. Default: () """ toserver = multiprocessing.JoinableQueue(5000) toclients = [multiprocessing.Queue() for i in range(maxclients)] freelines = multiprocessing.Queue() for i in range(maxclients): freelines.put(i) serverCW = SharedConnectionWrapperServer(targetconnection, toserver, toclients) userfuncnames = [] for func in userfuncs: if not (callable(func) and hasattr(func, 'func_name') and \ not func.func_name == '<lambda>'): raise ValueError, "Elements in userfunc must be callable and named" if hasattr(SharedConnectionWrapperClient, func.func_name): raise ValueError, "Illegal function name: " + func.func_name setattr(serverCW, '_userfunc_' + func.func_name, func) userfuncnames.append(func.func_name) serverprocess = multiprocessing.Process(target=serverCW.worker) serverprocess.name = 'Process for shared connection wrapper' serverprocess.daemon = True serverprocess.start() module = targetconnection.getunderlyingmodule() clientCW = SharedConnectionWrapperClient(toserver, toclients, freelines, module, userfuncnames) return clientCW