def callback(self, case, policy, name, result ): ''' Method responsible for storing results. The implementation in this class only keeps track of how many runs have been completed and logging this. :param case: the case to be stored :param policy: the name of the policy being used :param name: the name of the model being used :param result: the result dict ''' self.i+=1 debug(str(self.i)+" cases completed") if self.i % 100 == 0: info(str(self.i)+" cases completed")
def save_results(results, file): ''' save the results to the specified cPickle file. To facilitate transfer across different machines. the files are saved in binary format see also: http://projects.scipy.org/numpy/ticket/1284 :param results: the return of run_experiments :param file: the path of the file :raises: IOError if file not found ''' debug("saving results to: " + os.path.abspath(file)) try: cPickle.dump(results, open(file, 'wb'), protocol=2) except IOError: warning(os.path.abspath(file) + " not found") raise
def merge_results(results1, results2, downsample=None): ''' convenience function for merging the return from :meth:`~model.SimpleModelEnsemble.perform_experiments`. The function merges results2 with results1. For the experiments, it generates an empty array equal to the size of the sum of the experiments. As dtype is uses the dtype from the experiments in results1. The function assumes that the ordering of dtypes and names is identical in both results. A typical use case for this function is in combination with :func:`~util.experiments_to_cases`. Using :func:`~util.experiments_to_cases` one extracts the cases from a first set of experiments. One then performs these cases on a different model or policy, and then one wants to merge these new results with the old result for further analysis. :param results1: first results to be merged :param results2: second results to be merged :param downsample: should be an integer, will be used in slicing the results in order to avoid memory problems. :return: the merged results ''' #start of merging old_exp, old_res = results1 new_exp, new_res = results2 #merge experiments dtypes = old_exp.dtype merged_exp = np.empty((old_exp.shape[0]+new_exp.shape[0],),dtype= dtypes) merged_exp[0:old_exp.shape[0]] = old_exp merged_exp[old_exp.shape[0]::] = new_exp #only merge the results that are in both keys = old_res.keys() [keys.append(key) for key in new_res.keys()] keys = set(keys) info("intersection of keys: %s" % keys) #merging results merged_res = {} for key in keys: info("merge "+key) old_value = old_res.get(key) new_value = new_res.get(key) i = old_value.shape[0]+new_value.shape[0] j = old_value.shape[1] slice = 1 if downsample: j = int(math.ceil(j/downsample)) slice = downsample merged_value = np.empty((i,j)) debug("merged shape: %s" % merged_value.shape) merged_value[0:old_value.shape[0], :] = old_value[:, ::slice] merged_value[old_value.shape[0]::, :] = new_value[:, ::slice] merged_res[key] = merged_value mr = (merged_exp, merged_res) return mr
def run(self): self._setupLogger() p = multiprocessing.current_process() debug('process %s with pid %s started' % (p.name, p.pid)) #call the run of the super, which in turn will call the worker function super(LoggingProcess, self).run()
def worker(inqueue, outqueue, modelInterfaces, modelInitKwargs=None): # # Code run by worker processes # debug("worker started") put = outqueue.put get = inqueue.get if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() def cleanup(modelInterfaces): for msi in modelInterfaces: msi.cleanup() del msi oldPolicy = {} modelInitialized = False while 1: try: task = get() except (EOFError, IOError): debug('worker got EOFError or IOError -- exiting') break if task is None: debug('worker got sentinel -- exiting') cleanup(modelInterfaces) break job, i, case, policy = task for modelInterface in modelInterfaces: if policy != oldPolicy: modelInitialized = False try: debug("invoking model init") modelInterface.model_init(policy, modelInitKwargs) debug("model initialized successfully") modelInitialized = True except EMAError as e: exception("init not implemented") raise except Exception: exception("some exception occurred when invoking the init") if modelInitialized: try: try: debug("trying to run model") modelInterface.run_model(copy.deepcopy(case)) except CaseError as e: EMAlogging.warning(e) debug("trying to retrieve output") result = modelInterface.retrieve_output() debug("trying to reset model") modelInterface.reset_model() result = (True, (case, policy, modelInterface.name, result)) except Exception as e: result = (False, e) else: result = (False, EMAParallelError("failure to initialize")) put((job, i, result)) oldPolicy = policy
def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, task_handler, result_handler, cache, workingDirectories): EMAlogging.info("terminating pool") # this is guaranteed to only be called once debug('finalizing pool') TERMINATE = 2 task_handler._state = TERMINATE for p in pool: taskqueue.put(None) # sentinel time.sleep(1) debug('helping task handler/workers to finish') cls._help_stuff_finish(inqueue, task_handler, len(pool)) assert result_handler.is_alive() or len(cache) == 0 result_handler._state = TERMINATE outqueue.put(None) # sentinel if pool and hasattr(pool[0], 'terminate'): debug('terminating workers') for p in pool: p.terminate() debug('joining task handler') task_handler.join(1e100) debug('joining result handler') result_handler.join(1e100) if pool and hasattr(pool[0], 'terminate'): debug('joining pool workers') for p in pool: p.join() # cleaning up directories # TODO investigate whether the multiprocessing.util tempdirectory # functionality can be used instead for directory in workingDirectories: directory = os.path.dirname(directory) EMAlogging.debug("deleting "+str(directory)) shutil.rmtree(directory)
def _handle_tasks(taskqueue, put, outqueue, pool, logQueue): thread = threading.current_thread() for taskseq, set_length in iter(taskqueue.get, None): i = -1 for i, task in enumerate(taskseq): if thread._state: debug('task handler found thread._state != RUN') break try: put(task) except IOError: debug('could not put task on queue') break else: if set_length: debug('doing set_length()') set_length(i+1) continue break else: debug('task handler got sentinel') try: # tell result handler to finish when cache is empty debug('task handler sending sentinel to result handler') outqueue.put(None) # tell workers there is no more work debug('task handler sending sentinel to workers') for i in range(2*len(pool)): put(None) except IOError: debug('task handler got IOError when sending sentinels') debug('task handler exiting') time.sleep(2) logQueue.put(None)
def __init__(self, modelStructureInterfaces, processes=None, callback = None, kwargs=None): ''' :param modelStructureInterface: modelInterface class :param processes: nr. of processes to spawn, if none, it is set to equal the nr. of cores :param callback: callback function for handling the output :param kwargs: kwargs to be pased to :meth:`model_init` ''' self._setup_queues() self._taskqueue = Queue.Queue() self._cache = {} self._state = RUN self._callback = callback if processes is None: try: processes = cpu_count() except NotImplementedError: processes = 1 info("nr of processes is "+str(processes)) self.Process = LoggingProcess self.logQueue = multiprocessing.Queue() h = EMAlogging.NullHandler() logging.getLogger(EMAlogging.LOGGER_NAME).addHandler(h) # This thread will read from the subprocesses and write to the # main log's handlers. log_queue_reader = LogQueueReader(self.logQueue) log_queue_reader.start() self._pool = [] workingDirectories = [] debug('generating workers') workerRoot = None for i in range(processes): debug('generating worker '+str(i)) workerName = 'PoolWorker'+str(i) def ignore_function(path, names): if path.find('.svn') != -1: return names else: return [] #setup working directories for parallelEMA for msi in modelStructureInterfaces: if msi.workingDirectory != None: if workerRoot == None: workerRoot = os.path.dirname(os.path.abspath(modelStructureInterfaces[0].workingDirectory)) workingDirectory = os.path.join(workerRoot, workerName, msi.name) workingDirectories.append(workingDirectory) shutil.copytree(msi.workingDirectory, workingDirectory, ignore = ignore_function) msi.set_working_directory(workingDirectory) w = self.Process( self.logQueue, level = logging.getLogger(EMAlogging.LOGGER_NAME)\ .getEffectiveLevel(), target=worker, args=(self._inqueue, self._outqueue, modelStructureInterfaces, kwargs) ) self._pool.append(w) w.name = w.name.replace('Process', workerName) w.daemon = True w.start() debug(' worker '+str(i) + ' generated') self._task_handler = threading.Thread( target=CalculatorPool._handle_tasks, args=(self._taskqueue, self._quick_put, self._outqueue, self._pool, self.logQueue) ) self._task_handler.daemon = True self._task_handler._state = RUN self._task_handler.start() self._result_handler = threading.Thread( target=CalculatorPool._handle_results, args=(self._outqueue, self._quick_get, self._cache) ) self._result_handler.daemon = True self._result_handler._state = RUN self._result_handler.start() self._terminate = Finalize(self, self._terminate_pool, args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, self._task_handler, self._result_handler, self._cache, workingDirectories, ), exitpriority=15 ) EMAlogging.info("pool has been set up")