def __init__(self, working_directory, name): """interface to the model interface to the model Parameters ---------- working_directory : str working_directory for the model. name : str name of the modelInterface. The name should contain only alpha-numerical characters. Raises ------ EMAError if name contains non alpha-numerical characters .. note:: Anything that is relative to `self.working_directory` should be specified in `model_init` and not in `__init__`. Otherwise, the code will not work when running it in parallel. The reason for this is that the working directory is being updated by parallelEMA to the worker's separate working directory prior to calling `model_init`. """ super(VensimModelStructureInterface, self).__init__(working_directory, name) self.outcomes.append(Outcome('TIME' , time=True)) self.outcomes = list(self.outcomes) self._lookup_uncertainties = [] debug("vensim interface init completed")
def run(self): """ read from the queue and write to the log handlers The logging documentation says logging is thread safe, so there shouldn't be contention between normal logging (from the main process) and this thread. Note that we're using the name of the original logger. """ while True: try: record = self.queue.get() # get the logger for this record if record is None: ema_logging.debug("none received") break logger = logging.getLogger(record.name) logger.callHandlers(record) except (KeyboardInterrupt, SystemExit): raise except EOFError: break except: traceback.print_exc(file=sys.stderr)
def _store_result(self, case_id, result): for outcome in self.outcomes: ema_logging.debug("storing {}".format(outcome)) try: outcome_res = result[outcome] except KeyError: ema_logging.debug("%s not in msi" % outcome) else: try: self.results[outcome][case_id,] = outcome_res self.results[outcome].flush() except KeyError: data = np.asarray(outcome_res) shape = data.shape if len(shape) > 2: raise ema_exceptions.EMAError(self.shape_error_msg.format(len(shape))) shape = list(shape) shape.insert(0, self.nr_experiments) shape = tuple(shape) fh = tempfile.TemporaryFile() self.results[outcome] = np.memmap(fh, dtype=data.dtype, shape=shape) self.results[outcome][:] = np.NAN self.results[outcome][case_id,] = data self.results[outcome].flush()
def model_init(self, policy, kwargs): ''' Method called to initialize the model. Parameters ---------- policy : dict policy to be run. kwargs : dict keyword arguments to be used by model_intit. This gives users to the ability to pass any additional arguments. ''' if not self.xl: try: ema_logging.debug("trying to start Excel") self.xl = win32com.client.Dispatch("Excel.Application") ema_logging.debug("Excel started") except com_error as e: raise EMAError(str(e)) ema_logging.debug("trying to open workbook") self.wb = self.xl.Workbooks.Open(self.working_directory + self.workbook) ema_logging.debug("workbook opened") ema_logging.debug(self.working_directory)
def _store_result(self, case_id, result): for outcome in self.outcomes: ema_logging.debug("storing {}".format(outcome)) try: outcome_res = result[outcome] except KeyError: ema_logging.debug("%s not in msi" % outcome) else: try: self.results[outcome][case_id, ] = outcome_res self.results[outcome].flush() except KeyError: data = np.asarray(outcome_res) shape = data.shape if len(shape) > 2: raise ema_exceptions.EMAError( self.shape_error_msg.format(len(shape))) shape = list(shape) shape.insert(0, self.nr_experiments) shape = tuple(shape) fh = tempfile.TemporaryFile() self.results[outcome] = np.memmap(fh, dtype=data.dtype, shape=shape) self.results[outcome][:] = np.NAN self.results[outcome][case_id, ] = data self.results[outcome].flush()
def __call__(self, case_id, case, policy, name, result): ''' Method responsible for storing results. The implementation in this class only keeps track of how many runs have been completed and logging this. Any extension of AbstractCallback needs to implement this method. If one want to use the logging provided here, call it via super. Parameters ---------- case_id: int the job id case: dict the case to be stored policy: str the name of the policy being used name: str the name of the model being used result: dict the result dict ''' self.i+=1 ema_logging.debug(str(self.i)+" cases completed") if self.i % self.reporting_interval == 0: ema_logging.info(str(self.i)+" cases completed")
def _store_result(self, case_id, result): for outcome in self.outcomes: ema_logging.debug("storing {}".format(outcome)) try: outcome_res = result[outcome] except KeyError: ema_logging.debug("%s not specified as outcome in msi" % outcome) else: try: self.results[outcome][case_id, ] = outcome_res except KeyError: shape = np.asarray(outcome_res).shape if len(shape) > 2: raise ema_exceptions.EMAError( self.shape_error_msg.format(len(shape))) shape = list(shape) shape.insert(0, self.nr_experiments) self.results[outcome] = np.empty(shape) self.results[outcome][:] = np.NAN self.results[outcome][case_id, ] = outcome_res
def __init__(self, working_directory, name): """interface to the model interface to the model Parameters ---------- working_directory : str working_directory for the model. name : str name of the modelInterface. The name should contain only alpha-numerical characters. Raises ------ EMAError if name contains non alpha-numerical characters .. note:: Anything that is relative to `self.working_directory` should be specified in `model_init` and not in `__init__`. Otherwise, the code will not work when running it in parallel. The reason for this is that the working directory is being updated by parallelEMA to the worker's separate working directory prior to calling `model_init`. """ super(VensimModelStructureInterface, self).__init__(working_directory, name) self.outcomes.append(Outcome('TIME', time=True)) self.outcomes = list(self.outcomes) self._lookup_uncertainties = [] debug("vensim interface init completed")
def run_simulation(file_name): ''' Convenient function to run a model and store the results of the run in the specified .vdf file. The specified output file will be overwritten by default Parameters ---------- file_name : str the file name of the output file relative to the working directory Raises ------ VensimError if running the model failed in some way. ''' file_name = str(file_name) try: debug(" executing COMMAND: SIMULATE>RUNNAME|" + file_name + "|O") command("SIMULATE>RUNNAME|" + file_name + "|O") debug(r"MENU>RUN|o") command(r"MENU>RUN|o") except VensimWarning as w: warning((str(w))) raise VensimError(str(w))
def _first_get_population(self): ''' called only once to initialize some stuff, returns a population. After the first call, _get_population is used instead. ''' ema_logging.debug("Start of evolution") new_pop = self.toolbox.population(self.pop_size) # Evaluate the entire population self.evaluate_population(new_pop, self.reporting_interval, self.toolbox, self.ensemble) self.pop = new_pop # This is just to assign the crowding distance to the individuals tools.emo.assignCrowdingDist(self.pop) self.stats_callback(self.pop) self.stats_callback.log_stats(self.called) if self.caching: self._update_cache(self.pop) self.get_population = self._get_population
def run_simulation(file_name): ''' Convenient function to run a model and store the results of the run in the specified .vdf file. The specified output file will be overwritten by default Parameters ---------- file_name : str the file name of the output file relative to the working directory Raises ------ VensimError if running the model failed in some way. ''' file_name = str(file_name) try: debug(" executing COMMAND: SIMULATE>RUNNAME|"+file_name+"|O") command("SIMULATE>RUNNAME|"+file_name+"|O") debug(r"MENU>RUN|o") command(r"MENU>RUN|o") except VensimWarning as w: warning((str(w))) raise VensimError(str(w))
def cleanup(self): ''' cleaning up prior to finishing performing experiments. This will close the workbook and close Excel''' ema_logging.debug("cleaning up") if self.wb: self.wb.Close(False) del self.wb if self.xl: self.xl.DisplayAlerts = False self.xl.Quit() del self.xl self.xl = None self.wb = None
def set_engine_logger(): '''Updates EMA logging on the engines with an EngineLoggerAdapter This adapter injects EMA as a topic into all messages ''' logger = Application.instance().log logger.setLevel(ema_logging.DEBUG) for handler in logger.handlers: if isinstance(handler, IPython.kernel.zmq.log.EnginePUBHandler): # @UndefinedVariable handler.setLevel(ema_logging.DEBUG) adapter = EngingeLoggerAdapter(logger, SUBTOPIC) ema_logging._logger = adapter ema_logging.debug('updated logger')
def set_working_directory(self, wd): ''' Method for setting the working directory of the model interface. This method is used in case of running models in parallel. In this case, each worker process will have its own working directory, to avoid having to share files across processes. This requires the need to update the working directory to the new working directory. Parameters ---------- wd : str The new working directory. ''' wd = os.path.abspath(wd) debug('setting working directory to ' + wd) self._working_directory = wd
def set_working_directory(self, wd): """ Method for setting the working directory of the model interface. This method is used in case of running models in parallel. In this case, each worker process will have its own working directory, to avoid having to share files across processes. This requires the need to update the working directory to the new working directory. Parameters ---------- wd : str The new working directory. """ wd = os.path.abspath(wd) debug("setting working directory to " + wd) self._working_directory = wd
def run_model(self, case): """ Method for running an instantiated model structure. the provided implementation assumes that the keys in the case match the variable names in the Vensim model. If lookups are to be set specify their transformation from uncertainties to lookup values in the extension of this method, then call this one using super with the updated case dict. if you want to use cin_files, set the cin_file, or cin_files in the extension of this method to `self.cin_file`. Parameters ---------- case : dict keyword arguments for running the model. The case is a dict with the names of the uncertainties as key, and the values to which to set these uncertainties. .. note:: setting parameters should always be done via run_model. The model is reset to its initial values automatically after each run. """ if self.cin_file: try: read_cin_file(self.working_directory + self.cin_file) except VensimWarning as w: debug(str(w)) else: debug("cin file read successfully") for lookup_uncertainty in self._lookup_uncertainties: # ask the lookup to transform the retrieved uncertainties to the # proper lookup value case[lookup_uncertainty.name] = lookup_uncertainty.transform(case) for key, value in case.items(): set_value(key, value)
def read_cin_file(file_name): ''' read a .cin file Parameters ---------- file_name : str file name of cin file, relative to working directory Raises ------ VensimWarning if the cin file cannot be read. ''' debug("executing COMMAND: SIMULATE>READCIN|" + file_name) try: command(r"SIMULATE>READCIN|" + str(file_name)) except VensimWarning as w: debug(str(w)) raise w
def read_cin_file(file_name): ''' read a .cin file Parameters ---------- file_name : str file name of cin file, relative to working directory Raises ------ VensimWarning if the cin file cannot be read. ''' debug("executing COMMAND: SIMULATE>READCIN|"+file_name) try: command(r"SIMULATE>READCIN|"+str(file_name)) except VensimWarning as w: debug(str(w)) raise w
def experiment_generator(designs, model_structures, policies): ''' generator function which yields experiments Parameters ---------- designs : iterable of dicts model_structures : list policies : list Notes ----- this generator is essentially three nested loops: for each model structure, for each policy, for each experiment, run the experiment. This means that designs should not be a generator because this will be exhausted after the running the first policy on the first model. ''' job_counter = itertools.count() for msi in model_structures: debug("generating designs for model %s" % (msi.name)) msi_uncs = {unc.name for unc in msi.uncertainties} for policy in policies: debug("generating designs for policy %s" % (policy['name'])) for design in designs: # from the design only get the uncertainties that # are valid for the current msi keys = set(design.keys()).intersection(msi_uncs) experiment = {unc: design[unc] for unc in keys} # complete the design by adding the policy, model name # and experiment id to it experiment['policy'] = policy experiment['model'] = msi.name experiment['experiment id'] = six.next(job_counter) yield experiment
def experiment_generator(designs, model_structures, policies): ''' generator function which yields experiments Parameters ---------- designs : iterable of dicts model_structures : list policies : list Notes ----- this generator is essentially three nested loops: for each model structure, for each policy, for each experiment, run the experiment. This means that designs should not be a generator because this will be exhausted after the running the first policy on the first model. ''' job_counter = itertools.count() for msi in model_structures: debug("generating designs for model %s" % (msi.name)) msi_uncs = {unc.name for unc in msi.uncertainties} for policy in policies: debug("generating designs for policy %s" % (policy['name'])) for design in designs: # from the design only get the uncertainties that # are valid for the current msi keys = set(design.keys()).intersection(msi_uncs) experiment = {unc:design[unc] for unc in keys} # complete the design by adding the policy, model name # and experiment id to it experiment['policy'] = policy experiment['model'] = msi.name experiment['experiment id'] = six.next(job_counter) yield experiment
def model_init(self, policy, kwargs): ''' Method called to initialize the model. Parameters ---------- policy : dict policy to be run. kwargs : dict keyword arguments to be used by model_intit. This gives users to the ability to pass any additional arguments. ''' self.policy = policy self.netlogo = pyNetLogo.NetLogoLink() debug("netlogo started") path = self.working_directory+self.model_file self.netlogo.load_model(path) debug("model opened")
def __init__(self, gui=False, thd=False): ''' Create a link with netlogo. Underneath, the netlogo jvm is started through jpype. :param gui: boolean, if true run netlogo with gui, otherwise run in headless mode. Defaults to false. :param thd: boolean, if thrue start netlogo in 3d mode. Defaults to false ''' if not jpype.isJVMStarted(): # netlogo jars jars = [ NETLOGO_HOME + r'/lib/scala-library.jar', NETLOGO_HOME + r'/lib/asm-all-3.3.1.jar', NETLOGO_HOME + r'/lib/picocontainer-2.13.6.jar', NETLOGO_HOME + r'/lib/log4j-1.2.16.jar', NETLOGO_HOME + r'/lib/jmf-2.1.1e.jar', NETLOGO_HOME + r'/lib/pegdown-1.1.0.jar', NETLOGO_HOME + r'/lib/parboiled-core-1.0.2.jar', NETLOGO_HOME + r'/lib/parboiled-java-1.0.2.jar', NETLOGO_HOME + r'/lib/mrjadapter-1.2.jar', NETLOGO_HOME + r'/lib/jhotdraw-6.0b1.jar', NETLOGO_HOME + r'/lib/quaqua-7.3.4.jar', NETLOGO_HOME + r'/lib/swing-layout-7.3.4.jar', NETLOGO_HOME + r'/lib/jogl-1.1.1.jar', NETLOGO_HOME + r'/lib/gluegen-rt-1.1.1.jar', NETLOGO_HOME + r'/NetLogo.jar', PYNETLOGO_HOME + r'/external_files/netlogoLink.jar' ] # format jars in right format for starting java virtual machine # TODO the use of the jre here is only relevant under windows # apparently # might be solvable by setting netlogo home user.dir joined_jars = jar_separator.join(jars) jarpath = '-Djava.class.path={}'.format(joined_jars) jvm_handle = jpype.getDefaultJVMPath() jpype.startJVM(jvm_handle, jarpath, "-Xms128M", "-Xmx1024m") jpype.java.lang.System.setProperty('user.dir', NETLOGO_HOME) if sys.platform == 'darwin': jpype.java.lang.System.setProperty("java.awt.headless", "true") debug("jvm started") link = jpype.JClass('netlogoLink.NetLogoLink') debug('NetLogoLink class found') if sys.platform == 'darwin' and gui: info('on mac only headless mode is supported') gui = False self.link = link(gui, thd) debug('NetLogoLink class instantiated')
def load_model(file_name): ''' load the model Parameters ---------- file_name : str file name of model, relative to working directory Raises ------- VensimError if the model cannot be loaded. .. note: only works for .vpm files ''' debug("executing COMMAND: SIMULATE>SPECIAL>LOADMODEL|" + file_name) try: command("SPECIAL>LOADMODEL|" + str(file_name)) except VensimWarning as w: warning(str(w)) raise VensimError("vensim file not found")
def test_log_messages(self): ema_logging.log_to_stderr(ema_logging.DEBUG) with mock.patch('util.ema_logging._logger') as mocked_logger: message = 'test message' ema_logging.debug(message) mocked_logger.debug.assert_called_with(message) ema_logging.info(message) mocked_logger.info.assert_called_with(message) ema_logging.warning(message) mocked_logger.warning.assert_called_with(message) ema_logging.error(message) mocked_logger.error.assert_called_with(message) ema_logging.exception(message) mocked_logger.exception.assert_called_with(message) ema_logging.critical(message) mocked_logger.critical.assert_called_with(message)
def __init__(self, gui=False, thd=False): ''' Create a link with netlogo. Underneath, the netlogo jvm is started through jpype. :param gui: boolean, if true run netlogo with gui, otherwise run in headless mode. Defaults to false. :param thd: boolean, if thrue start netlogo in 3d mode. Defaults to false ''' if not jpype.isJVMStarted(): # netlogo jars jars = [NETLOGO_HOME + r'/lib/scala-library.jar', NETLOGO_HOME + r'/lib/asm-all-3.3.1.jar', NETLOGO_HOME + r'/lib/picocontainer-2.13.6.jar', NETLOGO_HOME + r'/lib/log4j-1.2.16.jar', NETLOGO_HOME + r'/lib/jmf-2.1.1e.jar', NETLOGO_HOME + r'/lib/pegdown-1.1.0.jar', NETLOGO_HOME + r'/lib/parboiled-core-1.0.2.jar', NETLOGO_HOME + r'/lib/parboiled-java-1.0.2.jar', NETLOGO_HOME + r'/lib/mrjadapter-1.2.jar', NETLOGO_HOME + r'/lib/jhotdraw-6.0b1.jar', NETLOGO_HOME + r'/lib/quaqua-7.3.4.jar', NETLOGO_HOME + r'/lib/swing-layout-7.3.4.jar', NETLOGO_HOME + r'/lib/jogl-1.1.1.jar', NETLOGO_HOME + r'/lib/gluegen-rt-1.1.1.jar', NETLOGO_HOME + r'/NetLogo.jar', PYNETLOGO_HOME + r'/external_files/netlogoLink.jar'] # format jars in right format for starting java virtual machine # TODO the use of the jre here is only relevant under windows # apparently # might be solvable by setting netlogo home user.dir joined_jars = jar_separator.join(jars) jarpath = '-Djava.class.path={}'.format(joined_jars) jvm_handle = jpype.getDefaultJVMPath() jpype.startJVM(jvm_handle, jarpath, "-Xms128M","-Xmx1024m") jpype.java.lang.System.setProperty('user.dir', NETLOGO_HOME) if sys.platform=='darwin': jpype.java.lang.System.setProperty("java.awt.headless", "true"); debug("jvm started") link = jpype.JClass('netlogoLink.NetLogoLink') debug('NetLogoLink class found') if sys.platform == 'darwin' and gui: info('on mac only headless mode is supported') gui=False self.link = link(gui, thd) debug('NetLogoLink class instantiated')
def load_model(file_name): ''' load the model Parameters ---------- file_name : str file name of model, relative to working directory Raises ------- VensimError if the model cannot be loaded. .. note: only works for .vpm files ''' debug("executing COMMAND: SIMULATE>SPECIAL>LOADMODEL|"+file_name) try: command("SPECIAL>LOADMODEL|"+str(file_name)) except VensimWarning as w: warning(str(w)) raise VensimError("vensim file not found")
def _store_result(self, case_id, result): for outcome in self.outcomes: ema_logging.debug("storing {}".format(outcome)) try: outcome_res = result[outcome] except KeyError: ema_logging.debug("%s not specified as outcome in msi" % outcome) else: try: self.results[outcome][case_id, ] = outcome_res except KeyError: shape = np.asarray(outcome_res).shape if len(shape)>2: raise ema_exceptions.EMAError(self.shape_error_msg.format(len(shape))) shape = list(shape) shape.insert(0, self.nr_experiments) self.results[outcome] = np.empty(shape) self.results[outcome][:] = np.NAN self.results[outcome][case_id, ] = outcome_res
def model_init(self, policy, kwargs): """ Init of the model, The provided implementation here assumes that `self.model_file` is set correctly. In case of using different vensim models for different policies, it is recommended to extent this method, extract the model file from the policy dict, set `self.model_file` to this file and then call this implementation through calling `super`. Parameters ---------- policy : dict policy to be run. kwargs : dict keyword arguments to be used by model_intit. This gives users to the ability to pass any additional arguments. """ load_model(self.working_directory+self.model_file) #load the model debug("model initialized successfully") be_quiet() # minimize the screens that are shown try: initialTime = get_val('INITIAL TIME') finalTime = get_val('FINAL TIME') timeStep = get_val('TIME STEP') savePer = get_val('SAVEPER') if savePer > 0: timeStep = savePer self.run_length = int((finalTime - initialTime)/timeStep +1) except VensimWarning: raise EMAWarning(str(VensimWarning))
def model_init(self, policy, kwargs): """ Init of the model, The provided implementation here assumes that `self.model_file` is set correctly. In case of using different vensim models for different policies, it is recommended to extent this method, extract the model file from the policy dict, set `self.model_file` to this file and then call this implementation through calling `super`. Parameters ---------- policy : dict policy to be run. kwargs : dict keyword arguments to be used by model_intit. This gives users to the ability to pass any additional arguments. """ load_model(self.working_directory + self.model_file) #load the model debug("model initialized successfully") be_quiet() # minimize the screens that are shown try: initialTime = get_val('INITIAL TIME') finalTime = get_val('FINAL TIME') timeStep = get_val('TIME STEP') savePer = get_val('SAVEPER') if savePer > 0: timeStep = savePer self.run_length = int((finalTime - initialTime) / timeStep + 1) except VensimWarning: raise EMAWarning(str(VensimWarning))
def worker(inqueue, outqueue, model_interfaces, model_kwargs=None): # # Code run by worker processes # ema_logging.debug("worker started") if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() msis = {msi.name: msi for msi in model_interfaces} runner = ExperimentRunner(msis, model_kwargs) while 1: try: task = inqueue.get() except (EOFError, IOError): ema_logging.debug('worker got EOFError or IOError -- exiting') break if task is None: ema_logging.debug('worker got sentinel -- exiting') break _, experiment = task experiment_id = experiment.get('experiment id') try: result = runner.run_experiment(experiment) outqueue.put((experiment_id, (True, result))) except EMAError as inst: result = (False, inst) outqueue.put((experiment_id, result)) except Exception: result = (False, EMAParallelError("failure to initialize")) outqueue.put((experiment_id, result))
def envelopes(results, outcomes_to_show = [], group_by = None, grouping_specifiers = None, density=None, fill=False, legend=True, titles={}, ylabels={}, log=False): ''' Make envelop plots. An envelope shows over time the minimum and maximum value for a set of runs over time. It is thus to be used in case of time series data. The function will try to find a result labeled "TIME". If this is present, these values will be used on the X-axis. In case of Vensim models, TIME is present by default. Parameters ---------- results : tupule return from :meth:`perform_experiments`. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. If empty, all outcomes are plotted. **Note**: just names. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : iterable or dict, optional set of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. density : {None, HIST, KDE, VIOLIN, BOXPLOT}, optional fill : bool, optional legend : bool, optional titles : dict, optional a way for controlling whether each of the axes should have a title. There are three possibilities. If set to None, no title will be shown for any of the axes. If set to an empty dict, the default, the title is identical to the name of the outcome of interest. If you want to override these default names, provide a dict with the outcome of interest as key and the desired title as value. This dict need only contain the outcomes for which you want to use a different title. ylabels : dict, optional way for controlling the ylabels. Works identical to titles. log : bool, optional log scale density plot Returns ------- Figure : Figure instance axes : dict dict with outcome as key, and axes as value. Density axes' are indexed by the outcome followed by _density. Note ---- the current implementation is limited to seven different categories in case of group_by, categories, and/or discretesize. This limit is due to the colors specified in COLOR_LIST. Examples -------- >>> import util as util >>> data = util.load_results(r'1000 flu cases.cPickle') >>> envelopes(data, group_by='policy') will show an envelope for three three different policies, for all the outcomes of interest. while >>> envelopes(data, group_by='policy', categories=['static policy', 'adaptive policy']) will only show results for the two specified policies, ignoring any results associated with \'no policy\'. ''' debug("generating envelopes") results = copy.deepcopy(results) prepared_data = prepare_data(results, outcomes_to_show, group_by, grouping_specifiers) outcomes, outcomes_to_show, time, grouping_labels = prepared_data figure, grid = make_grid(outcomes_to_show, density) # do the plotting axes_dict = {} for i, outcome_to_plot in enumerate(outcomes_to_show): ax = figure.add_subplot(grid[i,0]) axes_dict[outcome_to_plot] = ax ax_d= None if density: ax_d = figure.add_subplot(grid[i,1], sharey=ax) axes_dict[outcome_to_plot+"_density"] = ax_d if group_by: group_by_envelopes(outcomes,outcome_to_plot, time, density, ax, ax_d, fill, grouping_labels, log) else: single_envelope(outcomes, outcome_to_plot, time, density, ax, ax_d, fill, log) if ax_d: for tl in ax_d.get_yticklabels(): tl.set_visible(False) ax.set_xlabel(TIME_LABEL) do_ylabels(ax, ylabels, outcome_to_plot) do_titles(ax, titles, outcome_to_plot) if legend and group_by: gs1 = grid[0,0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break if fill: make_legend(grouping_labels, ax, alpha=0.3, legend_type=PATCH) else: make_legend(grouping_labels, ax, legend_type=LINE) if TIGHT: grid.tight_layout(figure) return figure, axes_dict
def pairs_scatter(results, outcomes_to_show = [], group_by = None, grouping_specifiers = None, ylabels = {}, legend=True, point_in_time=-1, filter_scalar=True, **kwargs): ''' Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ scatter multiplot. In case of time-series data, the end states are used. Parameters ---------- results : tuple return from perform_experiments. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : dict, optional dict of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. ylabels : dict, optional ylabels is a dictionary with the outcome names as keys, the specified values will be used as labels for the y axis. legend : bool, optional if true, and group_by is given, show a legend. point_in_time : float, optional the point in time at which the scatter is to be made. If None is provided (default), the end states are used. point_in_time should be a valid value on time filter_scalar: bool, optional remove the non-time-series outcomes. Defaults to True. Returns ------- fig : Figure instance the figure instance axes : dict key is tuple of names of outcomes, value is associated axes instance .. note:: the current implementation is limited to seven different categories in case of column, categories, and/or discretesize. This limit is due to the colors specified in COLOR_LIST. ''' debug("generating pairwise scatter plot") prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, grouping_specifiers, point_in_time, filter_scalar) outcomes, outcomes_to_show, grouping_labels = prepared_data grid = gridspec.GridSpec(len(outcomes_to_show), len(outcomes_to_show)) grid.update(wspace = 0.1, hspace = 0.1) #the plotting figure = plt.figure() axes_dict = {} combis = [(field1, field2) for field1 in outcomes_to_show\ for field2 in outcomes_to_show] for field1, field2 in combis: i = list(outcomes_to_show).index(field1) j = list(outcomes_to_show).index(field2) ax = figure.add_subplot(grid[i,j]) axes_dict[(field1, field2)] = ax if group_by: for x, group in enumerate(grouping_labels): y_data = outcomes[group][field1] x_data = outcomes[group][field2] facecolor = plotting_util.COLOR_LIST[x] edgecolor = 'k' if i==j: facecolor = 'white' edgecolor = 'white' ax.scatter(x_data, y_data, facecolor=facecolor, edgecolor=edgecolor) else: y_data = outcomes[field1] x_data = outcomes[field2] facecolor = 'b' edgecolor = 'k' if i==j: facecolor = 'white' edgecolor = 'white' ax.scatter(x_data, y_data, facecolor=facecolor, edgecolor=edgecolor) do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show) if group_by and legend: gs1 = grid[0,0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break make_legend(grouping_labels, ax, legend_type=SCATTER) return figure, axes_dict
def run_model(self, case): """ Method for running an instantiated model structure. the provided implementation assumes that the keys in the case match the variable names in the Vensim model. If lookups are to be set specify their transformation from uncertainties to lookup values in the extension of this method, then call this one using super with the updated case dict. if you want to use cin_files, set the cin_file, or cin_files in the extension of this method to `self.cin_file`. Parameters ---------- case : dict keyword arguments for running the model. The case is a dict with the names of the uncertainties as key, and the values to which to set these uncertainties. .. note:: setting parameters should always be done via run_model. The model is reset to its initial values automatically after each run. """ if self.cin_file: try: read_cin_file(self.working_directory+self.cin_file) except VensimWarning as w: debug(str(w)) else: debug("cin file read successfully") for lookup_uncertainty in self._lookup_uncertainties: # ask the lookup to transform the retrieved uncertainties to the # proper lookup value case[lookup_uncertainty.name] = lookup_uncertainty.transform(case) for key, value in case.items(): set_value(key, value) debug("model parameters set successfully") debug("run simulation, results stored in " + self.working_directory+self.result_file) try: run_simulation(self.working_directory+self.result_file) except VensimError: raise results = {} error = False for output in self.outcomes: debug("getting data for %s" %output.name) result = get_data(self.working_directory+self.result_file, output.name ) debug("successfully retrieved data for %s" %output.name) if result is not None: if result.shape[0] != self.run_length: got = result.shape[0] data = np.empty((self.run_length)) data[:] = np.NAN data[0:result.shape[0]] = result result = data error = True if not output.time: result = result[-1] else: result = result[0::self.step] try: results[output.name] = result except ValueError as e: print("what") raise e self.output = results if error: raise CaseError("run not completed, got %s, expected %s" % (got, self.run_length), case)
def pairs_lines(results, outcomes_to_show=[], group_by=None, grouping_specifiers=None, ylabels={}, legend=True, **kwargs): ''' Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ lines multiplot. It shows the behavior of two outcomes over time against each other. The origin is denoted with a circle and the end is denoted with a '+'. Parameters ---------- results : tuple return from perform_experiments. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : dict, optional dict of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. ylabels : dict, optional ylabels is a dictionary with the outcome names as keys, the specified values will be used as labels for the y axis. legend : bool, optional if true, and group_by is given, show a legend. point_in_time : float, optional the point in time at which the scatter is to be made. If None is provided (default), the end states are used. point_in_time should be a valid value on time Returns ------- fig the figure instance dict key is tuple of names of outcomes, value is associated axes instance ''' #unravel return from run_experiments debug("making a pars lines plot") prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, grouping_specifiers, None) outcomes, outcomes_to_show, grouping_labels = prepared_data grid = gridspec.GridSpec(len(outcomes_to_show), len(outcomes_to_show)) grid.update(wspace=0.1, hspace=0.1) #the plotting figure = plt.figure() axes_dict = {} combis = [(field1, field2) for field1 in outcomes_to_show\ for field2 in outcomes_to_show] for field1, field2 in combis: i = list(outcomes_to_show).index(field1) j = list(outcomes_to_show).index(field2) ax = figure.add_subplot(grid[i, j]) axes_dict[(field1, field2)] = ax if group_by: for x, entry in enumerate(grouping_labels): data1 = outcomes[entry][field1] data2 = outcomes[entry][field2] color = plotting_util.COLOR_LIST[x] if i == j: color = 'white' simple_pairs_lines(ax, data1, data2, color) else: data1 = outcomes[field1] data2 = outcomes[field2] color = 'b' if i == j: color = 'white' simple_pairs_lines(ax, data1, data2, color) do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show) if group_by and legend: gs1 = grid[0, 0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break make_legend(grouping_labels, ax, legend_type=LINE) return figure, axes_dict
def lines(results, outcomes_to_show=[], group_by=None, grouping_specifiers=None, density='', legend=True, titles={}, ylabels={}, experiments_to_show=None, show_envelope=False, log=False): '''This function takes the results from :meth:`perform_experiments` and visualizes these as line plots. It is thus to be used in case of time series data. The function will try to find a result labeled "TIME". If this is present, these values will be used on the X-axis. In case of Vensim models, TIME is present by default. Parameters ---------- results : tuple return from :meth:`perform_experiments`. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. If empty, all outcomes are plotted. **Note**: just names. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : iterable or dict, optional set of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. density : {None, HIST, KDE, VIOLIN, BOXPLOT}, optional legend : bool, optional titles : dict, optional a way for controlling whether each of the axes should have a title. There are three possibilities. If set to None, no title will be shown for any of the axes. If set to an empty dict, the default, the title is identical to the name of the outcome of interest. If you want to override these default names, provide a dict with the outcome of interest as key and the desired title as value. This dict need only contain the outcomes for which you want to use a different title. ylabels : dict, optional way for controlling the ylabels. Works identical to titles. experiments_to_show : ndarray, optional indices of experiments to show lines for, defaults to None. show_envelope : bool, optional show envelope op outcomes. This envelope is the based on the minimum at each column and the maximum at each column. log : bool, optional log scale density plot Returns ------- fig : Figure instance axes : dict dict with outcome as key, and axes as value. Density axes' are indexed by the outcome followed by _density. Note ---- the current implementation is limited to seven different categories in case of group_by, categories, and/or discretesize. This limit is due to the colors specified in COLOR_LIST. ''' debug("generating line graph") # make sure we have the data results = copy.deepcopy(results) if show_envelope: return plot_lines_with_envelopes( results, outcomes_to_show=outcomes_to_show, group_by=group_by, legend=legend, density=density, grouping_specifiers=grouping_specifiers, experiments_to_show=experiments_to_show, titles=titles, ylabels=ylabels, log=log) if experiments_to_show is not None: experiments, outcomes = results experiments = experiments[experiments_to_show] new_outcomes = {} for key, value in outcomes.items(): new_outcomes[key] = value[experiments_to_show] results = experiments, new_outcomes data = prepare_data(results, outcomes_to_show, group_by, grouping_specifiers) outcomes, outcomes_to_show, time, grouping_labels = data figure, grid = make_grid(outcomes_to_show, density) axes_dict = {} # do the plotting for i, outcome_to_plot in enumerate(outcomes_to_show): ax = figure.add_subplot(grid[i, 0]) axes_dict[outcome_to_plot] = ax ax_d = None if density: ax_d = figure.add_subplot(grid[i, 1], sharey=ax) axes_dict[outcome_to_plot + "_density"] = ax_d for tl in ax_d.get_yticklabels(): tl.set_visible(False) if group_by: group_by_lines(outcomes, outcome_to_plot, time, density, ax, ax_d, grouping_labels, log) else: simple_lines(outcomes, outcome_to_plot, time, density, ax, ax_d, log) ax.set_xlabel(TIME_LABEL) do_ylabels(ax, ylabels, outcome_to_plot) do_titles(ax, titles, outcome_to_plot) if legend and group_by: gs1 = grid[0, 0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break make_legend(grouping_labels, ax) if TIGHT: grid.tight_layout(figure) return figure, axes_dict
def pairs_scatter(results, outcomes_to_show=[], group_by=None, grouping_specifiers=None, ylabels={}, legend=True, point_in_time=-1, filter_scalar=True, **kwargs): ''' Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ scatter multiplot. In case of time-series data, the end states are used. Parameters ---------- results : tuple return from perform_experiments. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : dict, optional dict of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. ylabels : dict, optional ylabels is a dictionary with the outcome names as keys, the specified values will be used as labels for the y axis. legend : bool, optional if true, and group_by is given, show a legend. point_in_time : float, optional the point in time at which the scatter is to be made. If None is provided (default), the end states are used. point_in_time should be a valid value on time filter_scalar: bool, optional remove the non-time-series outcomes. Defaults to True. Returns ------- fig : Figure instance the figure instance axes : dict key is tuple of names of outcomes, value is associated axes instance .. note:: the current implementation is limited to seven different categories in case of column, categories, and/or discretesize. This limit is due to the colors specified in COLOR_LIST. ''' debug("generating pairwise scatter plot") prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, grouping_specifiers, point_in_time, filter_scalar) outcomes, outcomes_to_show, grouping_labels = prepared_data grid = gridspec.GridSpec(len(outcomes_to_show), len(outcomes_to_show)) grid.update(wspace=0.1, hspace=0.1) #the plotting figure = plt.figure() axes_dict = {} combis = [(field1, field2) for field1 in outcomes_to_show\ for field2 in outcomes_to_show] for field1, field2 in combis: i = list(outcomes_to_show).index(field1) j = list(outcomes_to_show).index(field2) ax = figure.add_subplot(grid[i, j]) axes_dict[(field1, field2)] = ax if group_by: for x, group in enumerate(grouping_labels): y_data = outcomes[group][field1] x_data = outcomes[group][field2] facecolor = plotting_util.COLOR_LIST[x] edgecolor = 'k' if i == j: facecolor = 'white' edgecolor = 'white' ax.scatter(x_data, y_data, facecolor=facecolor, edgecolor=edgecolor) else: y_data = outcomes[field1] x_data = outcomes[field2] facecolor = 'b' edgecolor = 'k' if i == j: facecolor = 'white' edgecolor = 'white' ax.scatter(x_data, y_data, facecolor=facecolor, edgecolor=edgecolor) do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show) if group_by and legend: gs1 = grid[0, 0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break make_legend(grouping_labels, ax, legend_type=SCATTER) return figure, axes_dict
def pairs_density(results, outcomes_to_show = [], group_by = None, grouping_specifiers = None, ylabels = {}, point_in_time=-1, log=True, gridsize=50, colormap='coolwarm', filter_scalar=True): ''' Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ hexbin density multiplot. In case of time-series data, the end states are used. hexbin makes hexagonal binning plot of x versus y, where x, y are 1-D sequences of the same length, N. If C is None (the default), this is a histogram of the number of occurences of the observations at (x[i],y[i]). For further detail see `matplotlib on hexbin <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hexbin>`_ Parameters ---------- results : tuple return from perform_experiments. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : dict, optional dict of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. ylabels : dict, optional ylabels is a dictionary with the outcome names as keys, the specified values will be used as labels for the y axis. point_in_time : float, optional the point in time at which the scatter is to be made. If None is provided (default), the end states are used. point_in_time should be a valid value on time log: bool, optional indicating whether density should be log scaled. Defaults to True. gridsize : int, optional controls the gridsize for the hexagonal bining. (default = 50) cmap : str color map that is to be used in generating the hexbin. For details on the available maps, see `pylab <http://matplotlib.sourceforge.net/examples/pylab_examples/show_colormaps.html#pylab-examples-show-colormaps>`_. (Defaults = coolwarm) filter_scalar: bool, optional remove the non-time-series outcomes. Defaults to True. Returns ------- fig the figure instance dict key is tuple of names of outcomes, value is associated axes instance ''' debug("generating pairwise density plot") prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, grouping_specifiers, point_in_time, filter_scalar) outcomes, outcomes_to_show, grouping_specifiers = prepared_data if group_by: #figure out the extents for each combination extents = determine_extents(outcomes, outcomes_to_show) axes_dicts = {} figures = [] for key, value in outcomes.items(): figure, axes_dict = simple_pairs_density(value, outcomes_to_show, log, colormap, gridsize, ylabels, extents=extents, title=key) axes_dicts[key] = axes_dict figures.append(figure) # harmonize the color scaling across figures combis = [(field1, field2) for field1 in outcomes_to_show\ for field2 in outcomes_to_show] for combi in combis: vmax = -1 for entry in axes_dicts.values(): vmax = max(entry[combi].collections[0].norm.vmax, vmax) for entry in axes_dicts.values(): ax = entry[combi] ax.collections[0].set_clim(vmin=0, vmax=vmax) del vmax return figures, axes_dicts else: return simple_pairs_density(outcomes, outcomes_to_show, log, colormap, gridsize, ylabels)
def envelopes(results, outcomes_to_show=[], group_by=None, grouping_specifiers=None, density=None, fill=False, legend=True, titles={}, ylabels={}, log=False): ''' Make envelop plots. An envelope shows over time the minimum and maximum value for a set of runs over time. It is thus to be used in case of time series data. The function will try to find a result labeled "TIME". If this is present, these values will be used on the X-axis. In case of Vensim models, TIME is present by default. Parameters ---------- results : tupule return from :meth:`perform_experiments`. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. If empty, all outcomes are plotted. **Note**: just names. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : iterable or dict, optional set of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. density : {None, HIST, KDE, VIOLIN, BOXPLOT}, optional fill : bool, optional legend : bool, optional titles : dict, optional a way for controlling whether each of the axes should have a title. There are three possibilities. If set to None, no title will be shown for any of the axes. If set to an empty dict, the default, the title is identical to the name of the outcome of interest. If you want to override these default names, provide a dict with the outcome of interest as key and the desired title as value. This dict need only contain the outcomes for which you want to use a different title. ylabels : dict, optional way for controlling the ylabels. Works identical to titles. log : bool, optional log scale density plot Returns ------- Figure : Figure instance axes : dict dict with outcome as key, and axes as value. Density axes' are indexed by the outcome followed by _density. Note ---- the current implementation is limited to seven different categories in case of group_by, categories, and/or discretesize. This limit is due to the colors specified in COLOR_LIST. Examples -------- >>> import util as util >>> data = util.load_results(r'1000 flu cases.cPickle') >>> envelopes(data, group_by='policy') will show an envelope for three three different policies, for all the outcomes of interest. while >>> envelopes(data, group_by='policy', categories=['static policy', 'adaptive policy']) will only show results for the two specified policies, ignoring any results associated with \'no policy\'. ''' debug("generating envelopes") results = copy.deepcopy(results) prepared_data = prepare_data(results, outcomes_to_show, group_by, grouping_specifiers) outcomes, outcomes_to_show, time, grouping_labels = prepared_data figure, grid = make_grid(outcomes_to_show, density) # do the plotting axes_dict = {} for i, outcome_to_plot in enumerate(outcomes_to_show): ax = figure.add_subplot(grid[i, 0]) axes_dict[outcome_to_plot] = ax ax_d = None if density: ax_d = figure.add_subplot(grid[i, 1], sharey=ax) axes_dict[outcome_to_plot + "_density"] = ax_d if group_by: group_by_envelopes(outcomes, outcome_to_plot, time, density, ax, ax_d, fill, grouping_labels, log) else: single_envelope(outcomes, outcome_to_plot, time, density, ax, ax_d, fill, log) if ax_d: for tl in ax_d.get_yticklabels(): tl.set_visible(False) ax.set_xlabel(TIME_LABEL) do_ylabels(ax, ylabels, outcome_to_plot) do_titles(ax, titles, outcome_to_plot) if legend and group_by: gs1 = grid[0, 0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break if fill: make_legend(grouping_labels, ax, alpha=0.3, legend_type=PATCH) else: make_legend(grouping_labels, ax, legend_type=LINE) if TIGHT: grid.tight_layout(figure) return figure, axes_dict
class AbstractCallback(object): ''' Abstract base class from which different call back classes can be derived. Callback is responsible for storing the results of the runs. Parameters ---------- uncs : list a list of the uncertainties over which the experiments are being run. outcomes : list a list of outcomes nr_experiments : int the total number of experiments to be executed reporting_interval : int, optional the interval at which to provide progress information via logging. Attributes ---------- i : int a counter that keeps track of how many experiments have been saved reporting_interval : int the frequency at which to log progress ''' __metaclass__ = abc.ABCMeta i = 0 reporting_interval = 100 def __init__(self, uncertainties, outcomes, nr_experiments, reporting_interval=100): self.reporting_interval = reporting_interval @abc.abstractmethod def __call__(self, case_id, case, policy, name, result): ''' Method responsible for storing results. The implementation in this class only keeps track of how many runs have been completed and logging this. Any extension of AbstractCallback needs to implement this method. If one want to use the logging provided here, call it via super. Parameters ---------- case_id: int the job id case: dict the case to be stored policy: str the name of the policy being used name: str the name of the model being used result: dict the result dict ''' self.i += 1 ema_logging.debug(str(self.i) + " cases completed") if self.i % self.reporting_interval == 0: ema_logging.info(str(self.i) + " cases completed")
def pairs_density(results, outcomes_to_show=[], group_by=None, grouping_specifiers=None, ylabels={}, point_in_time=-1, log=True, gridsize=50, colormap='coolwarm', filter_scalar=True): ''' Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ hexbin density multiplot. In case of time-series data, the end states are used. hexbin makes hexagonal binning plot of x versus y, where x, y are 1-D sequences of the same length, N. If C is None (the default), this is a histogram of the number of occurences of the observations at (x[i],y[i]). For further detail see `matplotlib on hexbin <http://matplotlib.sourceforge.net/api/pyplot_api.html#matplotlib.pyplot.hexbin>`_ Parameters ---------- results : tuple return from perform_experiments. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : dict, optional dict of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. ylabels : dict, optional ylabels is a dictionary with the outcome names as keys, the specified values will be used as labels for the y axis. point_in_time : float, optional the point in time at which the scatter is to be made. If None is provided (default), the end states are used. point_in_time should be a valid value on time log: bool, optional indicating whether density should be log scaled. Defaults to True. gridsize : int, optional controls the gridsize for the hexagonal bining. (default = 50) cmap : str color map that is to be used in generating the hexbin. For details on the available maps, see `pylab <http://matplotlib.sourceforge.net/examples/pylab_examples/show_colormaps.html#pylab-examples-show-colormaps>`_. (Defaults = coolwarm) filter_scalar: bool, optional remove the non-time-series outcomes. Defaults to True. Returns ------- fig the figure instance dict key is tuple of names of outcomes, value is associated axes instance ''' debug("generating pairwise density plot") prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, grouping_specifiers, point_in_time, filter_scalar) outcomes, outcomes_to_show, grouping_specifiers = prepared_data if group_by: #figure out the extents for each combination extents = determine_extents(outcomes, outcomes_to_show) axes_dicts = {} figures = [] for key, value in outcomes.items(): figure, axes_dict = simple_pairs_density(value, outcomes_to_show, log, colormap, gridsize, ylabels, extents=extents, title=key) axes_dicts[key] = axes_dict figures.append(figure) # harmonize the color scaling across figures combis = [(field1, field2) for field1 in outcomes_to_show\ for field2 in outcomes_to_show] for combi in combis: vmax = -1 for entry in axes_dicts.values(): vmax = max(entry[combi].collections[0].norm.vmax, vmax) for entry in axes_dicts.values(): ax = entry[combi] ax.collections[0].set_clim(vmin=0, vmax=vmax) del vmax return figures, axes_dicts else: return simple_pairs_density(outcomes, outcomes_to_show, log, colormap, gridsize, ylabels)
def run(self): self._setupLogger() p = multiprocessing.current_process() ema_logging.debug('process %s with pid %s started' % (p.name, p.pid)) #call the run of the super, which in turn will call the worker function super(LoggingProcess, self).run()
def run_experiment(self, experiment): '''The logic for running a single experiment. This code makes sure that model(s) are initialized correctly. Parameters ---------- experiment : dict Returns ------- experiment_id: int case : dict policy : str model_name : str result : dict Raises ------ EMAError if the model instance raises an EMA error, these are reraised. Exception Catch all for all other exceptions being raised by the model. These are reraised. ''' policy = experiment.pop('policy') model_name = experiment.pop('model') experiment_id = experiment.pop('experiment id') policy_name = policy['name'] ema_logging.debug("running policy {} for experiment {}".format(policy_name, experiment_id)) # check whether we already initialized the model for this # policy if not (policy_name, model_name) in self.msi_initialization.keys(): try: ema_logging.debug("invoking model init") msi = self.msis[model_name] msi.model_init(copy.deepcopy(policy), copy.deepcopy(self.model_kwargs)) except EMAError as inst: ema_logging.exception(inst) self.cleanup() raise inst except Exception as inst: ema_logging.exception("some exception occurred when invoking the init") self.cleanup() raise inst ema_logging.debug("initialized model %s with policy %s" % (model_name, policy_name)) self.msi_initialization = {(policy_name, model_name):self.msis[model_name]} msi = self.msis[model_name] case = copy.deepcopy(experiment) try: ema_logging.debug("trying to run model") msi.run_model(case) except CaseError as e: ema_logging.warning(str(e)) ema_logging.debug("trying to retrieve output") result = msi.retrieve_output() ema_logging.debug("trying to reset model") msi.reset_model() return experiment_id, case, policy, model_name, result
def _handle_tasks(taskqueue, put, outqueue, pool): thread = threading.current_thread() for task in iter(taskqueue.get, None): if thread._state: ema_logging.debug('task handler found thread._state != RUN') break try: put(task) except IOError: ema_logging.debug('could not put task on queue') break else: continue break else: ema_logging.debug('task handler got sentinel') try: # tell result handler to finish when cache is empty ema_logging.debug('task handler sending sentinel to result handler') outqueue.put(None) # tell workers there is no more work ema_logging.debug('task handler sending sentinel to workers') for _ in range(2*len(pool)): put(None) except IOError: ema_logging.debug('task handler got IOError when sending sentinels') ema_logging.debug('task handler exiting')
def _handle_results(outqueue, get, cache, log_queue): thread = threading.current_thread() while 1: try: task = get() except (IOError, EOFError): ema_logging.debug('result handler got EOFError/IOError -- exiting') return if thread._state: assert thread._state == pool.TERMINATE ema_logging.debug('result handler found thread._state=TERMINATE') break if task is None: ema_logging.debug('result handler got sentinel') break job, experiment = task try: cache[job]._set(experiment) except KeyError: pass while cache and thread._state != pool.TERMINATE: try: task = get() except (IOError, EOFError): ema_logging.debug('result handler got EOFError/IOError -- exiting') return if task is None: ema_logging.debug('result handler ignoring extra sentinel') continue job, obj = task try: cache[job]._set(obj) except KeyError: pass if hasattr(outqueue, '_reader'): ema_logging.debug('ensuring that outqueue is not full') # If we don't make room available in outqueue then # attempts to add the sentinel (None) to outqueue may # block. There is guaranteed to be no more than 2 sentinels. try: for _ in range(10): if not outqueue._reader.poll(): break get() except (IOError, EOFError): pass ema_logging.debug('result handler exiting: len(cache)=%s, thread._state=%s', len(cache), thread._state) log_queue.put(None)
def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, task_handler, result_handler, cache, working_dirs, ): ema_logging.info("terminating pool") # this is guaranteed to only be called once ema_logging.debug('finalizing pool') TERMINATE = 2 task_handler._state = TERMINATE for p in pool: taskqueue.put(None) # sentinel time.sleep(1) ema_logging.debug('helping task handler/workers to finish') cls._help_stuff_finish(inqueue, task_handler, len(pool)) assert result_handler.is_alive() or len(cache) == 0 result_handler._state = TERMINATE outqueue.put(None) # sentinel if pool and hasattr(pool[0], 'terminate'): ema_logging.debug('terminating workers') for p in pool: p.terminate() ema_logging.debug('joining task handler') task_handler.join(1e100) ema_logging.debug('joining result handler') result_handler.join(1e100) if pool and hasattr(pool[0], 'terminate'): ema_logging.debug('joining pool workers') for p in pool: p.join() # cleaning up directories # TODO investigate whether the multiprocessing.util tempdirectory # functionality can be used instead for directory in working_dirs: ema_logging.debug("deleting "+str(directory)) shutil.rmtree(directory)
def pairs_lines(results, outcomes_to_show = [], group_by = None, grouping_specifiers = None, ylabels = {}, legend=True, **kwargs): ''' Generate a `R style pairs <http://www.stat.psu.edu/~dhunter/R/html/graphics/html/pairs.html>`_ lines multiplot. It shows the behavior of two outcomes over time against each other. The origin is denoted with a circle and the end is denoted with a '+'. Parameters ---------- results : tuple return from perform_experiments. outcomes_to_show : list of str, optional list of outcome of interest you want to plot. group_by : str, optional name of the column in the cases array to group results by. Alternatively, `index` can be used to use indexing arrays as the basis for grouping. grouping_specifiers : dict, optional dict of categories to be used as a basis for grouping by. Grouping_specifiers is only meaningful if group_by is provided as well. In case of grouping by index, the grouping specifiers should be in a dictionary where the key denotes the name of the group. ylabels : dict, optional ylabels is a dictionary with the outcome names as keys, the specified values will be used as labels for the y axis. legend : bool, optional if true, and group_by is given, show a legend. point_in_time : float, optional the point in time at which the scatter is to be made. If None is provided (default), the end states are used. point_in_time should be a valid value on time Returns ------- fig the figure instance dict key is tuple of names of outcomes, value is associated axes instance ''' #unravel return from run_experiments debug("making a pars lines plot") prepared_data = prepare_pairs_data(results, outcomes_to_show, group_by, grouping_specifiers, None) outcomes, outcomes_to_show, grouping_labels = prepared_data grid = gridspec.GridSpec(len(outcomes_to_show), len(outcomes_to_show)) grid.update(wspace = 0.1, hspace = 0.1) #the plotting figure = plt.figure() axes_dict = {} combis = [(field1, field2) for field1 in outcomes_to_show\ for field2 in outcomes_to_show] for field1, field2 in combis: i = list(outcomes_to_show).index(field1) j = list(outcomes_to_show).index(field2) ax = figure.add_subplot(grid[i,j]) axes_dict[(field1, field2)] = ax if group_by: for x, entry in enumerate(grouping_labels): data1 = outcomes[entry][field1] data2 = outcomes[entry][field2] color = plotting_util.COLOR_LIST[x] if i==j: color = 'white' simple_pairs_lines(ax, data1, data2, color) else: data1 = outcomes[field1] data2 = outcomes[field2] color = 'b' if i==j: color = 'white' simple_pairs_lines(ax, data1, data2, color) do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show) if group_by and legend: gs1 = grid[0,0] for ax in figure.axes: gs2 = ax._subplotspec if all((gs1._gridspec == gs2._gridspec, gs1.num1 == gs2.num1, gs1.num2 == gs2.num2)): break make_legend(grouping_labels, ax, legend_type=LINE) return figure, axes_dict
def __init__(self, msis, processes=None, kwargs=None): ''' Parameters ---------- msis : list iterable of model structure interface instances processes: int nr. of processes to spawn, if none, it is set to equal the nr. of cores kwargs : dict kwargs to be pased to :meth:`model_init` ''' if processes is None: try: processes = multiprocessing.cpu_count() except NotImplementedError: processes = 1 ema_logging.info("nr of processes is "+str(processes)) # setup queues etc. self._setup_queues() self._taskqueue = queue.Queue(processes*2) self._cache = {} self._state = pool.RUN # handling of logging self.log_queue = multiprocessing.Queue() h = ema_logging.NullHandler() logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h) log_queue_reader = LogQueueReader(self.log_queue) log_queue_reader.start() # setup of the actual pool self._pool = [] working_dirs = [] ema_logging.debug('generating workers') worker_root = None for i in range(processes): ema_logging.debug('generating worker '+str(i)) workername = self._get_worker_name(i) #setup working directories for parallel_ema for msi in msis: if msi.working_directory != None: if worker_root == None: wd = msis[0].working_directory abs_wd = os.path.abspath(wd) worker_root = os.path.dirname(abs_wd) wd_name = workername + msi.name working_directory = os.path.join(worker_root, wd_name) working_dirs.append(working_directory) shutil.copytree(msi.working_directory, working_directory, ) msi.set_working_directory(working_directory) w = LoggingProcess( self.log_queue, level = logging.getLogger(ema_logging.LOGGER_NAME)\ .getEffectiveLevel(), target=worker, args=(self._inqueue, self._outqueue, msis, kwargs ) ) self._pool.append(w) w.name = w.name.replace('Process', workername) w.daemon = True w.start() ema_logging.debug(' worker '+str(i) + ' generated') # thread for handling tasks self._task_handler = threading.Thread( target=CalculatorPool._handle_tasks, name='task handler', args=(self._taskqueue, self._quick_put, self._outqueue, self._pool ) ) self._task_handler.daemon = True self._task_handler._state = pool.RUN self._task_handler.start() # thread for handling results self._result_handler = threading.Thread( target=CalculatorPool._handle_results, name='result handler', args=(self._outqueue, self._quick_get, self._cache, self.log_queue) ) self._result_handler.daemon = True self._result_handler._state = pool.RUN self._result_handler.start() # function for cleaning up when finalizing object self._terminate = Finalize(self, self._terminate_pool, args=(self._taskqueue, self._inqueue, self._outqueue, self._pool, self._task_handler, self._result_handler, self._cache, working_dirs, ), exitpriority=15 ) ema_logging.info("pool has been set up")