def run(self): """ read from the queue and write to the log handlers The logging documentation says logging is thread safe, so there shouldn't be contention between normal logging (from the main process) and this thread. Note that we're using the name of the original logger. """ while True: try: record = self.queue.get() # get the logger for this record if record is None: debug("none received") break logger = logging.getLogger(record.name) logger.callHandlers(record) except (KeyboardInterrupt, SystemExit): raise except EOFError: break except: traceback.print_exc(file=sys.stderr)
def load_optimization_results(file_name, weights, zipped=True): ''' load the specified bz2 file. the file is assumed to be saves using save_results. :param file: the path of the file :param zipped: load the pickled data from a zip file if True :return: the unpickled results :raises: IOError if file not found :raises: EMAError if weights are not correct ''' creator.create("Fitness", base.Fitness, weights=weights) creator.create("Individual", dict, fitness=creator.Fitness) #@UndefinedVariable file_name = os.path.abspath(file_name) debug("loading "+file_name) try: if zipped: file_name = bz2.BZ2File(file_name, 'rb') else: file_name = open(file_name, 'rb') results = cPickle.load(file_name) if results[0].weights != weights: raise EMAError("weights are %s, should be %s" % (weights, results[0].weights)) except IOError: warning(file_name + " not found") raise return results
def pickled_save_results(results, file_name, zipped=True): ''' save the results to the specified bz2 file. To facilitate transfer across different machines. the files are saved in binary format see also: http://projects.scipy.org/numpy/ticket/1284 :param results: the return of run_experiments :param file: the path of the file :param zipped: save the pickled data to a zip file if True :raises: IOError if file not found ''' file_name = os.path.abspath(file_name) debug("saving results to: " + file_name) try: if zipped: file_name = bz2.BZ2File(file_name, 'wb') else: file_name = open(file_name, 'wb') cPickle.dump(results, file_name, protocol=2) except IOError: warning(os.path.abspath(file_name) + " not found") raise
def _store_result(self, result): for outcome in self.outcomes: debug("storing {}".format(outcome)) try: outcome_res = result[outcome] except KeyError: ema_logging.debug("%s not in msi" % outcome) else: try: self.results[outcome][self.i-1, ] = outcome_res self.results[outcome].flush() except KeyError: data = np.asarray(outcome_res) shape = data.shape if len(shape)>2: raise EMAError(self.shape_error_msg.format(len(shape))) shape = list(shape) shape.insert(0, self.nr_experiments) shape = tuple(shape) fh = tempfile.TemporaryFile() self.results[outcome] = np.memmap(fh, dtype=data.dtype, shape=shape) self.results[outcome][:] = np.NAN self.results[outcome][self.i-1, ] = data self.results[outcome].flush()
def model_init(self, policy, kwargs): """ Init of the model, The provided implementation here assumes that `self.modelFile` is set correctly. In case of using different vensim models for different policies, it is recomended to extent this method, extract the model file from the policy dict, set `self.modelFile` to this file and then call this implementation through calling `super`. :param policy: a dict specifying the policy. In this implementation, this argument is ignored. :param kwargs: additional keyword arguments. In this implementation this argument is ignored. """ load_model(self.workingDirectory+self.modelFile) #load the model debug("model initialized successfully") be_quiet() # minimize the screens that are shown try: initialTime = get_val('INITIAL TIME') finalTime = get_val('FINAL TIME') timeStep = get_val('TIME STEP') savePer = get_val('SAVEPER') if savePer > 0: timeStep = savePer self.runLength = int((finalTime - initialTime)/timeStep +1) except VensimWarning: raise EMAWarning(str(VensimWarning))
def pickled_load_results(file_name, zipped=True): ''' load the specified bz2 file. the file is assumed to be saves using save_results. :param file: the path of the file :param zipped: load the pickled data from a zip file if True :return: the unpickled results :raises: IOError if file not found ''' results = None file_name = os.path.abspath(file_name) debug("loading "+file_name) try: if zipped: file_handle = bz2.BZ2File(file_name, 'rb') else: file_handle = open(file_name, 'rb') results = cPickle.load(file_handle) except IOError: warning(file_name + " not found") raise return results
def read_cin_file(file): ''' read a .cin file :param file: location of the .cin file. :exception: raises a :class:`~EMAExceptions.VensimWarning` if the cin file cannot be read. ''' debug("executing COMMAND: SIMULATE>READCIN|"+file) try: command(r"SIMULATE>READCIN|"+file) except VensimWarning as w: debug(str(w)) raise w
def set_working_directory(self, wd): ''' Method for setting the working directory of the model interface. This method is used in case of running models in parallel. In this case, each worker process will have its own working directory, to avoid having to share files across processes. This requires the need to update the working directory to the new working directory. :param wd: The new working directory. ''' wd = os.path.abspath(wd) debug('setting working directory to '+ wd) self.working_directory = wd
def load_model(file): ''' load the model :param file: the location of the .vpm file to be loaded. :exception: raises a :class:`~EMAExceptions.VensimError` if the model cannot be loaded. .. note: only works for .vpm files ''' debug("executing COMMAND: SIMULATE>SPECIAL>LOADMODEL|"+file) try: command(r"SPECIAL>LOADMODEL|"+file) except VensimWarning as w: warning(str(w)) raise VensimError("vensim file not found")
def __call__(self, case, policy, name, result): ''' Method responsible for storing results. The implementation in this class only keeps track of how many runs have been completed and logging this. :param case: the case to be stored :param policy: the name of the policy being used :param name: the name of the model being used :param result: the result dict ''' self.i+=1 debug(str(self.i)+" cases completed") if self.i % self.reporting_interval == 0: info(str(self.i)+" cases completed")
def run_simulation(file): ''' Convenient function to run a model and store the results of the run in the specified .vdf file. The specified output file will be overwritten by default :param file: the location of the outputfile :exception: raises a :class:`~EMAExceptions.VensimError` if running the model failed in some way. ''' try: debug(" executing COMMAND: SIMULATE>RUNNAME|"+file+"|O") command("SIMULATE>RUNNAME|"+file+"|O") debug(r"MENU>RUN|o") command(r"MENU>RUN|o") except VensimWarning as w: warning((str(w))) raise VensimError(str(w))
def cleanup(self): ''' cleaning up prior to finishing performing experiments. This will close the workbook and close Excel. ''' ema_logging.debug("cleaning up") if self.wb: self.wb.Close(False) del self.wb if self.xl: self.xl.DisplayAlerts = False self.xl.Quit() del self.xl self.xl = None self.wb = None
def __init__(self, workingDirectory, name): """interface to the model :param workingDirectory: workingDirectory for the model. :param name: name of the modelInterface. The name should contain only alphanumerical characters. .. note:: Anything that is relative to `self.workingDirectory` should be specified in `model_init` and not in `__init__`. Otherwise, the code will not work when running it in parallel. The reason for this is that the working directory is being updated by parallelEMA to the worker's separate working directory prior to calling `model_init`. """ super(VensimModelStructureInterface, self).__init__(workingDirectory, name) self.outcomes.append(Outcome('TIME' , time=True)) self.outcomes = list(self.outcomes) debug("vensim interface init completed")
def model_init(self, policy, kwargs): ''' :param policy: policy to be run, in the default implementation, this argument is ignored. Extent :meth:`model_init` to specify how this argument should be used. :param kwargs: keyword arguments to be used by :meth:`model_init` ''' if not self.xl: try: ema_logging.debug("trying to start Excel") self.xl = win32com.client.Dispatch("Excel.Application") ema_logging.debug("Excel started") ema_logging.debug("trying to open workbook") self.wb = self.xl.Workbooks.Open(self.workingDirectory + self.workbook) ema_logging.debug("workbook opened") except com_error as e: raise EMAError(str(e)) ema_logging.debug(self.workingDirectory)
def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, task_handler, result_handler, cache, working_dirs): info("terminating pool") # this is guaranteed to only be called once debug("finalizing pool") TERMINATE = 2 task_handler._state = TERMINATE for p in pool: taskqueue.put(None) # sentinel time.sleep(1) debug("helping task handler/workers to finish") cls._help_stuff_finish(inqueue, task_handler, len(pool)) assert result_handler.is_alive() or len(cache) == 0 result_handler._state = TERMINATE outqueue.put(None) # sentinel if pool and hasattr(pool[0], "terminate"): debug("terminating workers") for p in pool: p.terminate() debug("joining task handler") task_handler.join(1e100) debug("joining result handler") result_handler.join(1e100) if pool and hasattr(pool[0], "terminate"): debug("joining pool workers") for p in pool: p.join() # cleaning up directories # TODO investigate whether the multiprocessing.util tempdirectory # functionality can be used instead for directory in working_dirs: directory = os.path.dirname(directory) debug("deleting " + str(directory)) shutil.rmtree(directory)
def worker(inqueue, outqueue, model_interfaces, model_kwargs=None): # # Code run by worker processes # debug("worker started") put = outqueue.put get = inqueue.get if hasattr(inqueue, "_writer"): inqueue._writer.close() outqueue._reader.close() def cleanup(model_interfaces): for msi in model_interfaces: msi.cleanup() del msi msis = {msi.name: msi for msi in model_interfaces} while 1: try: task = get() except (EOFError, IOError): debug("worker got EOFError or IOError -- exiting") break if task is None: debug("worker got sentinel -- exiting") cleanup(model_interfaces) break job, i, experiment = task msi_initialization_dict = {} policy = experiment.pop("policy") msi = experiment.pop("model") # check whether we already initialized the model for this # policy if not msi_initialization_dict.has_key((policy["name"], msi)): try: debug("invoking model init") msis[msi].model_init(copy.deepcopy(policy), copy.deepcopy(model_kwargs)) except (EMAError, NotImplementedError) as inst: exception(inst) cleanup(model_interfaces) result = (False, inst) put((job, i, result)) except Exception: exception("some exception occurred when invoking the init") cleanup(model_interfaces) result = (False, EMAParallelError("failure to initialize")) put((job, i, result)) debug("initialized model %s with policy %s" % (msi, policy["name"])) # always, only a single initialized msi instance msi_initialization_dict = {(policy["name"], msi): msis[msi]} msi = msis[msi] case = copy.deepcopy(experiment) try: debug("trying to run model") msi.run_model(case) except CaseError as e: warning(str(e)) debug("trying to retrieve output") result = msi.retrieve_output() result = (True, (case, policy, msi.name, result)) msi.reset_model() debug("trying to reset model") put((job, i, result))
def _handle_tasks(taskqueue, put, outqueue, pool): thread = threading.current_thread() for taskseq, set_length in iter(taskqueue.get, None): i = -1 for i, task in enumerate(taskseq): if thread._state: debug("task handler found thread._state != RUN") break try: put(task) except IOError: debug("could not put task on queue") break else: if set_length: debug("doing set_length()") set_length(i + 1) continue break else: debug("task handler got sentinel") try: # tell result handler to finish when cache is empty debug("task handler sending sentinel to result handler") outqueue.put(None) # tell workers there is no more work debug("task handler sending sentinel to workers") for i in range(2 * len(pool)): put(None) except IOError: debug("task handler got IOError when sending sentinels") debug("task handler exiting")
def _handle_results(outqueue, get, cache, log_queue): thread = threading.current_thread() while 1: try: task = get() except (IOError, EOFError): debug("result handler got EOFError/IOError -- exiting") return if thread._state: assert thread._state == TERMINATE debug("result handler found thread._state=TERMINATE") break if task is None: debug("result handler got sentinel") break job, i, obj = task try: cache[job]._set(i, obj) except KeyError: pass while cache and thread._state != TERMINATE: try: task = get() except (IOError, EOFError): debug("result handler got EOFError/IOError -- exiting") return if task is None: debug("result handler ignoring extra sentinel") continue job, i, obj = task try: cache[job]._set(i, obj) except KeyError: pass if hasattr(outqueue, "_reader"): debug("ensuring that outqueue is not full") # If we don't make room available in outqueue then # attempts to add the sentinel (None) to outqueue may # block. There is guaranteed to be no more than 2 sentinels. try: for i in range(10): if not outqueue._reader.poll(): break get() except (IOError, EOFError): pass debug("result handler exiting: len(cache)=%s, thread._state=%s", len(cache), thread._state) log_queue.put(None)
def __init__(self, msis, processes=None, kwargs=None): """ :param msis: an iterable of model structure interfaces :param processes: nr. of processes to spawn, if none, it is set to equal the nr. of cores :param callback: callback function for handling the output :param kwargs: kwargs to be pased to :meth:`model_init` """ self._setup_queues() self._taskqueue = Queue(cpu_count() * 2) self._cache = {} self._state = RUN if processes is None: try: processes = cpu_count() except NotImplementedError: processes = 1 info("nr of processes is " + str(processes)) self.log_queue = Queue() h = NullHandler() logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h) # This thread will read from the subprocesses and write to the # main log's handlers. log_queue_reader = LogQueueReader(self.log_queue) log_queue_reader.start() self._pool = [] working_dirs = [] # msis = [copy.deepcopy(msi) for msi in msis] debug("generating workers") workerRoot = None for i in range(processes): debug("generating worker " + str(i)) workerName = "PoolWorker" + str(i) def ignore_function(path, names): if path.find(".svn") != -1: return names else: return [] # setup working directories for parallelEMA for msi in msis: if msi.workingDirectory != None: if workerRoot == None: workerRoot = os.path.dirname(os.path.abspath(msis[0].workingDirectory)) workingDirectory = os.path.join(workerRoot, workerName, msi.name) working_dirs.append(workingDirectory) shutil.copytree(msi.workingDirectory, workingDirectory, ignore=ignore_function) msi.set_working_directory(workingDirectory) w = LoggingProcess( self.log_queue, level=logging.getLogger(ema_logging.LOGGER_NAME).getEffectiveLevel(), target=worker, args=(self._inqueue, self._outqueue, msis, kwargs), ) self._pool.append(w) w.name = w.name.replace("Process", workerName) w.daemon = True w.start() debug(" worker " + str(i) + " generated") # thread for handling tasks self._task_handler = threading.Thread( target=CalculatorPool._handle_tasks, name="task handler", args=(self._taskqueue, self._quick_put, self._outqueue, self._pool), ) self._task_handler.daemon = True self._task_handler._state = RUN self._task_handler.start() # thread for handling results self._result_handler = threading.Thread( target=CalculatorPool._handle_results, name="result handler", args=(self._outqueue, self._quick_get, self._cache, self.log_queue), ) self._result_handler.daemon = True self._result_handler._state = RUN self._result_handler.start() # function for cleaning up when finalizing object self._terminate = Finalize( self, self._terminate_pool, args=( self._taskqueue, self._inqueue, self._outqueue, self._pool, self._task_handler, self._result_handler, self._cache, working_dirs, ), exitpriority=15, ) info("pool has been set up")
def run_model(self, case): """ Method for running an instantiated model structure. the provided implementation assumes that the keys in the case match the variable names in the Vensim model. If lookups are to be set specify their transformation from uncertainties to lookup values in the extension of this method, then call this one using super with the updated case dict. if you want to use cinFiles, set the cinFile, or cinFiles in the extension of this method to `self.cinFile`. :param case: the case to run .. note:: setting parameters should always be done via run_model. The model is reset to its initial values automatically after each run. """ if self.cinFile: try: read_cin_file(self.workingDirectory+self.cinFile) except VensimWarning as w: debug(str(w)) else: debug("cin file read successfully") for key, value in case.items(): set_value(key, value) debug("model parameters set successfully") debug("run simulation, results stored in " + self.workingDirectory+self.resultFile) try: run_simulation(self.workingDirectory+self.resultFile) except VensimError: raise results = {} error = False for output in self.outcomes: debug("getting data for %s" %output.name) result = get_data(self.workingDirectory+self.resultFile, output.name ) debug("successfully retrieved data for %s" %output.name) if not result == []: if result.shape[0] != self.runLength: got = result.shape[0] a = np.zeros((self.runLength)) a[0:result.shape[0]] = result result = a error = True if not output.time: result = [-1] else: result = result[0::self.step] try: results[output.name] = result except ValueError as e: print "what" raise e self.output = results if error: raise CaseError("run not completed, got %s, expected %s" % (got, self.runLength), case)
def __init__(self, msis, processes=None, kwargs=None): ''' :param msis: an iterable of model structure interfaces :param processes: nr. of processes to spawn, if none, it is set to equal the nr. of cores :param callback: callback function for handling the output :param kwargs: kwargs to be pased to :meth:`model_init` ''' self._setup_queues() self._taskqueue = Queue.Queue(cpu_count()*2) self._cache = {} self._state = RUN if processes is None: try: processes = cpu_count() except NotImplementedError: processes = 1 info("nr of processes is "+str(processes)) self.log_queue = multiprocessing.Queue() h = NullHandler() logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h) # This thread will read from the subprocesses and write to the # main log's handlers. log_queue_reader = LogQueueReader(self.log_queue) log_queue_reader.start() self._pool = [] working_dirs = [] debug('generating workers') worker_root = None for i in range(processes): debug('generating worker '+str(i)) # generate a random string helps in running repeatedly with # crashes choice_set = string.ascii_uppercase + string.digits + string.ascii_lowercase random_string = ''.join(random.choice(choice_set) for _ in range(5)) workername = 'tpm_{}_PoolWorker_{}'.format(random_string, i) #setup working directories for parallel_ema for msi in msis: if msi.working_directory != None: if worker_root == None: worker_root = os.path.dirname(os.path.abspath(msis[0].working_directory)) working_directory = os.path.join(worker_root, workername) # working_directory = tempfile.mkdtemp(suffix=workername, # prefix='tmp_', # dir=worker_root) working_dirs.append(working_directory) shutil.copytree(msi.working_directory, working_directory, ) msi.set_working_directory(working_directory) w = LoggingProcess( self.log_queue, level = logging.getLogger(ema_logging.LOGGER_NAME)\ .getEffectiveLevel(), target=worker, args=(self._inqueue, self._outqueue, msis, kwargs ) ) self._pool.append(w) w.name = w.name.replace('Process', workername) w.daemon = True w.start() debug(' worker '+str(i) + ' generated') # thread for handling tasks self._task_handler = threading.Thread( target=CalculatorPool._handle_tasks, name='task handler', args=(self._taskqueue, self._quick_put, self._outqueue, self._pool ) ) self._task_handler.daemon = True self._task_handler._state = RUN self._task_handler.start() # thread for handling results self._result_handler = threading.Thread( target=CalculatorPool._handle_results, name='result handler', args=(self._outqueue, self._quick_get, self._cache, self.log_queue) ) self._result_handler.daemon = True self._result_handler._state = RUN self._result_handler.start() # function for cleaning up when finalizing object self._terminate = Finalize(self, self._terminate_pool, args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, self._task_handler, self._result_handler, self._cache, working_dirs, ), exitpriority=15 ) info("pool has been set up")
def merge_results(results1, results2, downsample=None): ''' convenience function for merging the return from :meth:`~modelEnsemble.ModelEnsemble.perform_experiments`. The function merges results2 with results1. For the experiments, it generates an empty array equal to the size of the sum of the experiments. As dtype is uses the dtype from the experiments in results1. The function assumes that the ordering of dtypes and names is identical in both results. A typical use case for this function is in combination with :func:`~util.experiments_to_cases`. Using :func:`~util.experiments_to_cases` one extracts the cases from a first set of experiments. One then performs these cases on a different model or policy, and then one wants to merge these new results with the old result for further analysis. :param results1: first results to be merged :param results2: second results to be merged :param downsample: should be an integer, will be used in slicing the results in order to avoid memory problems. :return: the merged results ''' #start of merging old_exp, old_res = results1 new_exp, new_res = results2 #merge experiments dtypes = old_exp.dtype merged_exp = np.empty((old_exp.shape[0]+new_exp.shape[0],),dtype= dtypes) merged_exp[0:old_exp.shape[0]] = old_exp merged_exp[old_exp.shape[0]::] = new_exp #only merge the results that are in both keys = old_res.keys() [keys.append(key) for key in new_res.keys()] keys = set(keys) info("intersection of keys: %s" % keys) #merging results merged_res = {} for key in keys: info("merge "+key) old_value = old_res.get(key) new_value = new_res.get(key) i = old_value.shape[0]+new_value.shape[0] j = old_value.shape[1] slice_value = 1 if downsample: j = int(math.ceil(j/downsample)) slice_value = downsample merged_value = np.empty((i,j)) debug("merged shape: %s" % merged_value.shape) merged_value[0:old_value.shape[0], :] = old_value[:, ::slice_value] merged_value[old_value.shape[0]::, :] = new_value[:, ::slice_value] merged_res[key] = merged_value mr = (merged_exp, merged_res) return mr
def _handle_tasks(taskqueue, put, outqueue, pool): thread = threading.current_thread() for task in iter(taskqueue.get, None): if thread._state: debug('task handler found thread._state != RUN') break try: put(task) except IOError: debug('could not put task on queue') break else: continue break else: debug('task handler got sentinel') try: # tell result handler to finish when cache is empty debug('task handler sending sentinel to result handler') outqueue.put(None) # tell workers there is no more work debug('task handler sending sentinel to workers') for _ in range(2*len(pool)): put(None) except IOError: debug('task handler got IOError when sending sentinels') debug('task handler exiting')
def run(self): self._setupLogger() p = current_process() debug("process %s with pid %s started" % (p.name, p.pid)) # call the run of the super, which in turn will call the worker function super(LoggingProcess, self).run()
def __init__(self, msis, processes=None, kwargs=None): ''' :param msis: an iterable of model structure interfaces :param processes: nr. of processes to spawn, if none, it is set to equal the nr. of cores :param callback: callback function for handling the output :param kwargs: kwargs to be pased to :meth:`model_init` ''' if processes is None: try: processes = cpu_count() except NotImplementedError: processes = 1 info("nr of processes is "+str(processes)) # setup queues etc. self._setup_queues() self._taskqueue = Queue.Queue(processes*2) self._cache = {} self._state = RUN # handling of logging self.log_queue = multiprocessing.Queue() h = NullHandler() logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h) log_queue_reader = LogQueueReader(self.log_queue) log_queue_reader.start() # setup of the actual pool self._pool = [] working_dirs = [] debug('generating workers') worker_root = None for i in range(processes): debug('generating worker '+str(i)) workername = self._get_worker_name(i) #setup working directories for parallel_ema for msi in msis: if msi.working_directory != None: if worker_root == None: wd = msis[0].working_directory abs_wd = os.path.abspath(wd) worker_root = os.path.dirname(abs_wd) wd_name = workername + msi.name working_directory = os.path.join(worker_root, wd_name) # working_directory = tempfile.mkdtemp(suffix=workername, # prefix='tmp_', # dir=worker_root) working_dirs.append(working_directory) shutil.copytree(msi.working_directory, working_directory, ) msi.set_working_directory(working_directory) w = LoggingProcess( self.log_queue, level = logging.getLogger(ema_logging.LOGGER_NAME)\ .getEffectiveLevel(), target=worker, args=(self._inqueue, self._outqueue, msis, kwargs ) ) self._pool.append(w) w.name = w.name.replace('Process', workername) w.daemon = True w.start() debug(' worker '+str(i) + ' generated') # thread for handling tasks self._task_handler = threading.Thread( target=CalculatorPool._handle_tasks, name='task handler', args=(self._taskqueue, self._quick_put, self._outqueue, self._pool ) ) self._task_handler.daemon = True self._task_handler._state = RUN self._task_handler.start() # thread for handling results self._result_handler = threading.Thread( target=CalculatorPool._handle_results, name='result handler', args=(self._outqueue, self._quick_get, self._cache, self.log_queue) ) self._result_handler.daemon = True self._result_handler._state = RUN self._result_handler.start() # function for cleaning up when finalizing object self._terminate = Finalize(self, self._terminate_pool, args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, self._task_handler, self._result_handler, self._cache, working_dirs, ), exitpriority=15 ) info("pool has been set up")