Пример #1
0
def plot_violinplot(ax,data, log, group_labels=None):
    '''
    create violin plots on an axis
    '''
    
    if log:
        warning("log option ignored for violin plot")
    
    pos = range(len(data))
    dist = max(pos)-min(pos)
    w = min(0.15*max(dist,1.0),0.5)
    for data,p in zip(data,pos):
        if len(data)>0:
            kde = gaussian_kde(data) #calculates the kernel density
            x = np.linspace(np.min(data),np.max(data),250.) # support for violin
            v = kde.evaluate(x) #violin profile (density curve)
            
            scl = 1 / (v.max() / 0.4)
            v = v*scl #scaling the violin to the available space
            ax.fill_betweenx(x,p-v,p+v,facecolor=COLOR_LIST[p],alpha=0.6, lw=1.5)
            
            for percentile in [25, 75]:
                quant = scoreatpercentile(data.ravel(), percentile)
                q_x = kde.evaluate(quant) * scl 
                q_x = [p - q_x, p + q_x]
                ax.plot(q_x, [quant, quant], linestyle=":", c='k')
            med = np.median(data)
            m_x = kde.evaluate(med) * scl 
            m_x = [p - m_x, p + m_x]
            ax.plot(m_x, [med, med], linestyle="--", c='k', lw=1.5)            
        
    if group_labels:
        labels = group_labels[:]
        labels.insert(0, '')
        ax.set_xticklabels(labels, rotation='vertical')
def _make_box(x):
    '''
    Make a box that encompasses all the data
    
    Parameters
    ----------
    x : structured numpy array
    
    
    '''
    
    box = np.zeros((2, ), x.dtype)
    
    names = recfunctions.get_names(x.dtype)
    
    for name in names:
        dtype = x.dtype.fields.get(name)[0] 
        values = x[name]
        
        if dtype == 'object':
            try:
                values = set(values) - set([np.ma.masked])
                box[name][:] = values
            except TypeError as e:
                ema_logging.warning("{} has unhashable values".format(name))
                raise e
        else:
            box[name][0] = np.min(values, axis=0) 
            box[name][1] = np.max(values, axis=0)    
    return box  
Пример #3
0
 def __store_result(self, result):
     for outcome in self.outcomes:
         try:
             node = self.h5file.getNode(where=self.experiments,
                                        name=outcome)
         except NoSuchNodeError :
             if not _pythonIdRE.match(outcome):
                 warning( " %r object name is not a valid Python identifier "
                        "it does not match the pattern ``%s``; %s"
                        % (outcome, _pythonIdRE.pattern, warnInfo))
             
             try:
                 shapeResults = result[outcome].shape
                 if len(shapeResults) >0:
                     ncol = shapeResults[0] 
                 else:
                     ncol= 1
             except AttributeError:
                 #apparently the outcome is not an array but a scalar
                 ncol=1
                 
             temp = sys.stderr 
             sys.stderr = NullDevice()
             node = self.h5file.createCArray(where=self.experiments,
                                      name=outcome,
                                      atom=tables.Float32Atom(),#@UndefinedVariable
                                      shape=(self.nrOfExperiments, ncol)
                                      )
             sys.stderr = temp   
         
         node[self.i, :] = result[outcome]
Пример #4
0
def determine_kde(data, 
                  size_kde=1000,
                  ymin=None,
                  ymax=None):
    '''
    
    Helper function responsible for performing a KDE    
    
    :param data:
    
    
    '''
    if not ymin:
        ymin = np.min(data)
    if not ymax:
        ymax = np.max(data)
    
    kde_y = np.linspace(ymin, ymax, size_kde)
    
    try:
        kde_x = kde.gaussian_kde(data)
        kde_x = kde_x.evaluate(kde_y)
#         grid = GridSearchCV(KernelDensity(kernel='gaussian'),
#                             {'bandwidth': np.linspace(ymin, ymax, 20)},
#                             cv=20)
#         grid.fit(data[:, np.newaxis])
#         best_kde = grid.best_estimator_
#         kde_x = np.exp(best_kde.score_samples(kde_y[:, np.newaxis]))
    except Exception as e:
        warning(e)
        kde_x = np.zeros(kde_y.shape)
    
    return kde_x, kde_y
Пример #5
0
 def make_designs_table(self, group, uncs):
     #determine data types of uncertainties
     expDescription = {}
     self.categoricals = []
     for i, uncertainty in enumerate(uncs):
         name = uncertainty.name
         dataType = tables.FloatCol(pos=i+1) #@UndefinedVariable
         
         if isinstance(uncertainty, CategoricalUncertainty):
             dataType = tables.StringCol(16, pos=i+1) #@UndefinedVariable
             self.categoricals.append(name)
         elif isinstance(uncertainty, ParameterUncertainty) and\
                       uncertainty.dist==  INTEGER:
             dataType = tables.IntCol(pos=i+1) #@UndefinedVariable
         expDescription[name] = dataType
         
         if not _pythonIdRE.match(name):
             warning( " %r object name is not a valid Python identifier "
                    "it does not match the pattern ``%s``; %s"
                    % (name, _pythonIdRE.pattern, warnInfo))
         
     expDescription['model'] = tables.StringCol(16, pos=i+2)#@UndefinedVariable
     expDescription['policy'] =  tables.StringCol(16, pos=i+3)#@UndefinedVariable
        
     temp = sys.stderr 
     sys.stderr = NullDevice()
     experiments = self.h5file.createTable(group, 
                                        'designs', 
                                        expDescription)
     sys.stderr = temp   
     return experiments
Пример #6
0
def determine_kde(data, 
                  size_kde=1000,
                  ymin=None,
                  ymax=None):
    '''
    
    Helper function responsible for performing a KDE    
    
    :param data:
    
    
    '''
    if not ymin:
        ymin = np.min(data)
    if not ymax:
        ymax = np.max(data)
    
    kde_y = np.linspace(ymin, ymax, size_kde)[::-1]
    
    try:
        kde_x = kde.gaussian_kde(data)
        kde_x = kde_x.evaluate(kde_y)
    except np.linalg.LinAlgError as e:
        warning(e)
        kde_x = np.zeros(kde_y.shape)
    
    return kde_x, kde_y
Пример #7
0
def plot_boxplots(ax, values, log, group_labels=None):
    if log:
        warning("log option ignored for boxplot")
    
    
    ax.boxplot(values)
    if group_labels:
        ax.set_xticklabels(group_labels, rotation='vertical')
Пример #8
0
 def model_init(self, policy, kwargs):
     '''initializes the model'''
     
     try:
         self.model_file = policy['file']
     except KeyError:
         ema_logging.warning("key 'file' not found in policy")
     super(FluModel, self).model_init(policy, kwargs)
Пример #9
0
def group_by_envelopes(outcomes,
                       outcome_to_plot,
                       time,
                       density,
                       ax,
                       ax_d,
                       fill,
                       group_labels,
                       **kwargs):
    '''
    
    Helper function, responsible for generating an envelope plot based on
    a grouping. 
    
    :param outcomes: a dictonary containing the various outcomes to plot
    :param outcome_to_plot: the specific outcome to plot
    :param time: the name of the time dimension
    :param density: string, either hist, kde, or empty/None.    
    :param ax: the ax on which to plot
    :param ax_d: the ax on which to plot the density
    :param fill: boolean, if true, fill the envelope. 
    :param group_by_labels: order in which groups should be plotted
    :param kwargs: kwargs to be passed on to the helper function for plotting
                   the density.
    
    '''
    
    for j, key in enumerate(group_labels):
        value = outcomes[key]
        value = value[outcome_to_plot]
        try:
            plot_envelope(ax, j, time, value,fill)
        except ValueError:
            warning("value error when plotting for %s" % (key))
            raise
    
        if density=='kde':
            kde_x, kde_y = determine_kde(value[:,-1])
            plot_kde(ax_d, kde_x, kde_y, j, **kwargs)
    
    if density:
        if density=='hist':
            # rather nasty indexing going on here, outcomes[key] returns
            # a tuple, hence the[1] to get the dictonary with outcomes
            # out of this, we need the right outcome, and the final column
            # of values
            values = [outcomes[key][outcome_to_plot][:,-1] for key in group_labels]
            plot_histogram(ax_d, values, **kwargs)
        if density=='box plot':
            values = [outcomes[key][outcome_to_plot][:,-1] for key in group_labels]
            plot_boxplots(ax_d, values, group_labels, **kwargs)
        
        ax_d.get_yaxis().set_view_interval(
                     ax.get_yaxis().get_view_interval()[0],
                     ax.get_yaxis().get_view_interval()[1])
Пример #10
0
def load_model(file):
    '''
    load the model 
    
    :param file: the location of the .vpm file to be loaded.
    :exception: raises a :class:`~EMAExceptions.VensimError` if the model 
                cannot be loaded.
    
    .. note: only works for .vpm files
    
    '''
    ema_logging.debug("executing COMMAND: SIMULATE>SPECIAL>LOADMODEL|"+file)
    try:
        command(r"SPECIAL>LOADMODEL|"+file)
    except VensimWarning as w:
        ema_logging.warning(str(w))
        raise VensimError("vensim file not found")
Пример #11
0
def group_by_envelopes(outcomes,
                       outcome_to_plot,
                       time,
                       density,
                       ax,
                       ax_d,
                       fill,
                       group_labels, 
                       log):
    '''
    
    Helper function, responsible for generating an envelope plot based on
    a grouping. 
    
    :param outcomes: a dictonary containing the various outcomes to plot
    :param outcome_to_plot: the specific outcome to plot
    :param time: the name of the time dimension
    :param density: string, either hist, kde, or empty/None.    
    :param ax: the ax on which to plot
    :param ax_d: the ax on which to plot the density
    :param fill: boolean, if true, fill the envelope. 
    :param group_by_labels: order in which groups should be plotted
    :param log: boolean, log scale density plot
    
    '''
    
    for j, key in enumerate(group_labels):
        value = outcomes[key]
        value = value[outcome_to_plot]
        try:
            plot_envelope(ax, j, time, value,fill)
        except ValueError:
            warning("value error when plotting for %s" % (key))
            raise
    
    if density:
        group_density(ax_d, density, outcomes, outcome_to_plot, group_labels, 
                      log)
    
        ax_d.get_yaxis().set_view_interval(
                     ax.get_yaxis().get_view_interval()[0],
                     ax.get_yaxis().get_view_interval()[1])
Пример #12
0
def run_simulation(file):
    ''' 
    Convenient function to run a model and store the results of the run in 
    the specified .vdf file. The specified output file will be overwritten 
    by default

    :param file: the location of the outputfile
    :exception: raises a :class:`~EMAExceptions.VensimError` if running 
                the model failed in some way. 
                
    '''

    try:
        ema_logging.debug(" executing COMMAND: SIMULATE>RUNNAME|"+file+"|O")
        command("SIMULATE>RUNNAME|"+file+"|O")
        ema_logging.debug(r"MENU>RUN|o")
        command(r"MENU>RUN|o")
    except VensimWarning as w:
        ema_logging.warning((str(w)))
        raise VensimError(str(w))
Пример #13
0
def get_data(filename, varname, step=1):
    ''' 
    Retrieves data from simulation runs or imported data sets. 
    
    
    :param filename: the name of the .vdf file that contains the data
    :param varname: the name of the variable to retrieve data on
    :param step: steps used in slicing. Defaults to 1, meaning the full
                 recored time series is returned.
    :return: an array with the values for varname over the simulation
    
    '''
    
    vval = []
    try:
        vval, tval = vensimDLLwrapper.get_data(filename, varname)    
    except VensimWarning as w:
        ema_logging.warning(str(w))
        
    return vval
Пример #14
0
def do_titles(ax, titles, outcome):
    '''
    Helper function for setting the title on an ax
    
    :param ax: the ax on which to set the title
    :param titles: a dict which maps outcome names to titles
    :param outcome: the outcome plotted in the ax.
    
    
    '''
    
    if type(titles)==DictType:
        if not titles:
            ax.set_title(outcome)
        else:
            try:
                ax.set_title(titles[outcome])
            except KeyError:
                warning("key error in do_titles, no title provided for `%s`" % (outcome))
                ax.set_title(outcome)
Пример #15
0
def do_ylabels(ax, ylabels, outcome):
    '''
    Helper function for setting the y labels on an ax
    
    :param ax: the ax on which to set the y label
    :param titles: a dict which maps outcome names to y labels
    :param outcome: the outcome plotted in the ax.
    
    
    '''
    
    if type(ylabels)==DictType:
        if not ylabels:
            ax.set_ylabel(outcome)
        else:
            try:
                ax.set_ylabel(ylabels[outcome])
            except KeyError:
                warning("key error in do_ylabels, no ylabel provided for `%s`" % (outcome))
                ax.set_ylabel(outcome)    
Пример #16
0
 def model_init(self, policy, kwargs):
     """
     Method to initialize the model, it is called just prior to running 
     the model its main use is to initialize aspects of the model that can 
     not be pickled. In this way it is possible to run a model in parallel 
     without having to worry about having only pickleable attributes 
     (for more details read up on the multiprocessing library
     
     """
     
     if not jpype.isJVMStarted():
         classpath = r'-Djava.class.path=C:\workspace\ElectTransEMA\bin;C:\workspace\Repast3.1\bin;C:\workspace\Repast3.1\lib\asm.jar;C:\workspace\Repast3.1\lib\beanbowl.jar;C:\workspace\Repast3.1\lib\colt.jar;C:\workspace\Repast3.1\lib\commons-collections.jar;C:\workspace\Repast3.1\lib\commons-logging.jar;C:\workspace\Repast3.1\lib\geotools_repast.jar;C:\workspace\Repast3.1\lib\ibis.jar;C:\workspace\Repast3.1\lib\jakarta-poi.jar;C:\workspace\Repast3.1\lib\jep-2.24.jar;C:\workspace\Repast3.1\lib\jgap.jar;C:\workspace\Repast3.1\lib\jh.jar;C:\workspace\Repast3.1\lib\jmf.jar;C:\workspace\Repast3.1\lib\jode-1.1.2-pre1.jar;C:\workspace\Repast3.1\lib\log4j-1.2.8.jar;C:\workspace\Repast3.1\lib\joone.jar;C:\workspace\Repast3.1\lib\JTS.jar;C:\workspace\Repast3.1\lib\junit.jar;C:\workspace\Repast3.1\lib\OpenForecast-0.4.0.jar;C:\workspace\Repast3.1\lib\openmap.jar;C:\workspace\Repast3.1\lib\plot.jar;C:\workspace\Repast3.1\lib\ProActive.jar;C:\workspace\Repast3.1\lib\trove.jar;C:\workspace\Repast3.1\lib\violinstrings-1.0.2.jar;C:\workspace\Repast3.1\repast.jar'
         jpype.startJVM(r'C:\Program Files (x86)\Java\jdk1.6.0_22\jre\bin\client\jvm.dll', classpath)
         debug("jvm started")
     
     
     debug("trying to find package")
     try:
         modelPackage = jpype.JPackage("org").electTransEma
     except RuntimeError as inst:
         debug("exception " + repr(type(inst))+" " + str(inst))
     except TypeError as inst:
         debug("TypeEror " +" " + str(inst))
     except Exception as inst:
         debug("exception " + repr(type(inst))+" " + str(inst))
 
     else:
         debug("modelPackage found")
         self.modelInterfaceClass = modelPackage.ElectTransInterface
         debug("class found")
         
         try:
             directory = self.workingDirectory.replace("\\", "/")
             
             self.modelInterface = self.modelInterfaceClass(directory)
             debug("class loaded succesfully")
         except TypeError as inst:
             warning("failure to instantiate the model")
             raise inst
Пример #17
0
def set_value(variable, value):
    '''
    set the value of a variable to value
    
    current implementation only works for lookups and normal values. In case
    of a list, a lookup is assumed, else a normal value is assumed. 
    See the DSS reference supplement, p. 58 for details.

    
    :param variable: name of the variable to set.
    :param value: the value for the variable. 
                  **note**: the value can be either a list, or an float/integer. 
                  If it is a list, it is assumed the variable is a lookup.
    '''
    
    if type(value) == types.ListType:
        command(r"SIMULATE>SETVAL|"+variable+"("+ str(value)[1:-1] + ")")
    else:
        try:
            command(r"SIMULATE>SETVAL|"+variable+"="+str(value))
        except VensimWarning:
            ema_logging.warning('variable: \'' +variable+'\' not found')
Пример #18
0
    def run_model(self, case):
        """
        Method for running an instantiated model structures. This 
        implementation assumes that the names of the uncertainties correspond
        to the name of the cells in Excel. See e.g. `this site <http://spreadsheets.about.com/od/exceltips/qt/named_range.htm>`_ 
        for details or use Google and search on 'named range'. One of the 
        requirements on the names is that the cannot contains spaces. 

        For the extraction of results, the same approach is used. That is, 
        this implementation assumes that the name of a :class:`~outcomes.Outcome`
        instance corresponds to the name of a cell, or set of cells.

        :param case:    dictionary with arguments for running the model
        
        """
        #find right sheet
        try:
            sheet = self.wb.Sheets(self.sheet)
        except Exception :
            ema_logging.warning("com error: sheet not found")
            self.cleanup()
            raise
        
        #set values on sheet
        for key, value in case.items():
            try:
                sheet.Range(key).Value = value 
            except com_error:
                ema_logging.warning("com error: no cell(s) named %s found" % key,)

        #get results
        results = {}
        for outcome in self.outcomes:
            try:
                output = sheet.Range(outcome.name).Value
                try:
                    output = [value[0] for value in output]
                    output = np.array(output)
                except TypeError:
                    output = np.array(output)
                results[outcome.name] = output
            except com_error:
                ema_logging.warning("com error: no cell(s) named %s found" % outcome.name,)
        self.output = results
Пример #19
0
    def perform_experiments(self, 
                           cases,
                           callback=DefaultCallback,
                           reporting_interval=100,
                           model_kwargs = {},
                           which_uncertainties=INTERSECTION,
                           which_outcomes=INTERSECTION,
                           **kwargs):
        """
        Method responsible for running the experiments on a structure. In case 
        of multiple model structures, the outcomes are set to the intersection 
        of the sets of outcomes of the various models.         
        
        :param cases: In case of Latin Hypercube sampling and Monte Carlo 
                      sampling, cases specifies the number of cases to
                      generate. In case of Full Factorial sampling,
                      cases specifies the resolution to use for sampling
                      continuous uncertainties. Alternatively, one can supply
                      a list of dicts, where each dicts contains a case.
                      That is, an uncertainty name as key, and its value. 
        :param callback: Class that will be called after finishing a 
                         single experiment,
        :param reporting_interval: parameter for specifying the frequency with
                                   which the callback reports the progress.
                                   (Default is 100) 
        :param model_kwargs: dictionary of keyword arguments to be passed to 
                            model_init
        :param which_uncertainties: keyword argument for controlling whether,
                                    in case of multiple model structure 
                                    interfaces, the intersection or the union
                                    of uncertainties should be used. 
                                    (Default is intersection).  
        :param which_uncertainties: keyword argument for controlling whether,
                                    in case of multiple model structure 
                                    interfaces, the intersection or the union
                                    of outcomes should be used. 
                                    (Default is intersection).  
        :param kwargs: generic keyword arguments to pass on to callback
         
                       
        :returns: a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ 
                  containing the experiments, and a dict with the names of the 
                  outcomes as keys and an numpy array as value.
                
        .. rubric:: suggested use
        
        In general, analysis scripts require both the structured array of the 
        experiments and the dictionary of arrays containing the results. The 
        recommended use is the following::
        
        >>> results = ensemble.perform_experiments(10000) #recommended use
        >>> experiments, output = ensemble.perform_experiments(10000) 
        
        The latter option will work fine, but most analysis scripts require 
        to wrap it up into a tuple again::
        
        >>> data = (experiments, output)
        
        Another reason for the recommended use is that you can save this tuple
        directly::
        
        >>> import expWorkbench.util as util
        >>> util.save_results(results, filename)
          
        .. note:: The current implementation has a hard coded limit to the 
          number of designs possible. This is set to 50.000 designs. 
          If one want to go beyond this, set `self.max_designs` to
          a higher value.
        
        """

        if not self._policies:
            self._policies.append({"name": "None"})
        
        # identify the uncertainties and sample over them
        if type(cases) ==  types.IntType:
            sampled_unc, unc_dict = self._generate_samples(cases, 
                                                           which_uncertainties)
            nr_of_exp =self.sampler.deterimine_nr_of_designs(sampled_unc)\
                      *len(self._policies)*len(self._msis)
            experiments = self._generate_experiments(sampled_unc)
        elif type(cases) == types.ListType:
            unc_dict = self.determine_uncertainties()[1]
            unc_names = cases[0].keys()
            sampled_unc = {name:[] for name in unc_names}
            nr_of_exp = len(cases)*len(self._policies)*len(self._msis)
            experiments = self._generate_experiments(cases)
        else:
            raise EMAError("unknown type for cases")
        uncertainties = [unc_dict[unc] for unc in sorted(sampled_unc)]

        # identify the outcomes that are to be included
        overview_dict, element_dict = self._determine_unique_attributes("outcomes")
        if which_outcomes==UNION:
            outcomes = element_dict.keys()
        elif which_outcomes==INTERSECTION:
            outcomes = overview_dict[tuple([msi.name for msi in self._msis])]
            outcomes = [outcome.name for outcome in outcomes]
        else:
            raise ValueError("incomplete value for which_outcomes")
         
        info(str(nr_of_exp) + " experiment will be executed")
                
        #initialize the callback object
        callback = callback(uncertainties, 
                            outcomes, 
                            nr_of_exp,
                            reporting_interval=reporting_interval,
                            **kwargs)

        if self.parallel:
            info("preparing to perform experiment in parallel")
            
            if not self._pool:
                self._make_pool(model_kwargs)
            info("starting to perform experiments in parallel")

            self._pool.run_experiments(experiments, callback)
        else:
            info("starting to perform experiments sequentially")

            def cleanup(modelInterfaces):
                for msi in modelInterfaces:
                    msi.cleanup()
                    del msi

            
            msi_initialization_dict = {}
            msis = {msi.name: msi for msi in self._msis}
            job_counter = itertools.count()
            
            cwd = os.getcwd() 
            for experiment in experiments:
                case_id = job_counter.next()
                policy = experiment.pop('policy')
                msi = experiment.pop('model')
                
                # check whether we already initialized the model for this 
                # policy
                if not msi_initialization_dict.has_key((policy['name'], msi)):
                    try:
                        debug("invoking model init")
                        msis[msi].model_init(copy.deepcopy(policy),\
                                             copy.deepcopy(model_kwargs))
                    except (EMAError, NotImplementedError) as inst:
                        exception(inst)
                        cleanup(self._msis)
                        raise
                    except Exception:
                        exception("some exception occurred when invoking the init")
                        cleanup(self._msis)
                        raise 
                    debug("initialized model %s with policy %s" % (msi, policy['name']))
                    #always, only a single initialized msi instance
                    msi_initialization_dict = {(policy['name'], msi):msis[msi]}
                msi = msis[msi]

                case = copy.deepcopy(experiment)
                try:
                    debug("trying to run model")
                    msi.run_model(case)
                except CaseError as e:
                    warning(str(e))
                    
                debug("trying to retrieve output")
                result = msi.retrieve_output()
                msi.reset_model()
                
                debug("trying to reset model")
                callback(case_id, experiment, policy, msi.name, result)
                
            cleanup(self._msis)
            os.chdir(cwd)
       
        results = callback.get_results()
        info("experiments finished")
        
        return results
Пример #20
0
    def __init__(self, 
                 uncs, 
                 outcomes, 
                 nrOfExperiments, 
                 reporting_interval=100,
                 fileName=None,
                 experimentName=None):
        '''
        
        :param fileName: if the filename exist, the file is opened. If the file
                         does not exist, a new file wih the same name is 
                         created. 
        :param experimentsName: name of the series of experiments                 
        
        
        '''
        if not fileName: 
            raise EMAError("no file name specified for hdf5 file")
        elif not experimentName:
            raise EMAError("no experiments name specified ")
        
        
        super(HDF5Callback, self).__init__(uncs, 
                                              outcomes, 
                                              nrOfExperiments, 
                                              reporting_interval)
        try:
            self.h5file = tables.openFile(filename=fileName,mode='r+')
        except IOError as e:
            ema_logging.warning("file %s does not exist, a new file is created" %fileName)
            create_hdf5_ema__project(fileName=fileName)
            self.h5file = tables.openFile(filename=fileName, 
                                         mode='r+', 
                                         )

        # make a group for the new series of experiments
        # this raises an error if the group already exists
        self.experiments = self.h5file.createGroup('/experiments', experimentName)
        self.experiments._v_attrs.last_modified = datetime.datetime.now()

        #make table for experiments
        self.designs = self.make_designs_table(self.experiments, uncs)
        
        #some reflection
        stack = inspect.stack()
        first_entry = stack[1][0]
        ensemble = first_entry.f_locals['self']
        sampler = ensemble.sampler
        modelInterfaces = ensemble._modelStructures
      
        self.experiments._v_attrs.ensemble_svnid = format_svn_id(inspect.getmodule(ensemble).SVN_ID)
        for i, mi in enumerate(modelInterfaces):
            i = i+1
            self.experiments._v_attrs['mi_%s' %i] = mi.__class__.__name__
            self.experiments._v_attrs['mi_svnid_%s' %i] = format_svn_id(inspect.getmodule(mi).SVN_ID)

        self.designs._v_attrs.sampler = sampler.__class__.__name__
        self.designs._v_attrs.sampler_svnid = format_svn_id(inspect.getmodule(sampler).SVN_ID)
           
        self.design = self.designs.row
        
        self.outcomes = [outcome.name for outcome in outcomes]
        self.nrOfExperiments = nrOfExperiments
Пример #21
0
    def perform_experiments(self, 
                           cases,
                           callback = DefaultCallback,
                           reporting_interval=100,
                           modelKwargs = {},
                           **kwargs):
        """
        Method responsible for running the experiments on a structure. In case 
        of multiple model structures, the outcomes are set to the intersection 
        of the sets of outcomes of the various models.         
        
        :param cases: In case of Latin Hypercube sampling and Monte Carlo 
                      sampling, cases specifies the number of cases to
                      generate. In case of Full Factorial sampling,
                      cases specifies the resolution to use for sampling
                      continuous uncertainties. Alternatively, one can supply
                      a list of dicts, where each dicts contains a case.
                      That is, an uncertainty name as key, and its value. 
        :param callback: Class that will be called after finishing a 
                         single experiment,
        :param reporting_interval: parameter for specifying the frequency with
                                   which the callback reports the progress.
                                   (Default is 100) 
        :param modelKwargs: dictonary of keyword arguments to be passed to 
                            model_init
        :param kwargs: generic keyword arguments to pass on to callback
         
                       
        :returns: a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ 
                  containing the experiments, and a dict with the names of the 
                  outcomes as keys and an numpy array as value.
                
        .. rubric:: suggested use
        
        In general, analysis scripts require both the structured array of the 
        experiments and the dictionary of arrays containing the results. The 
        recommended use is the following::
        
        >>> results = ensemble.perform_experiments(10000) #recommended use
        >>> experiments, output = ensemble.perform_experiments(10000) #will work fine
        
        The latter option will work fine, but most analysis scripts require 
        to wrap it up into a tuple again::
        
        >>> data = (experiments, output)
        
        Another reason for the recommended use is that you can save this tuple
        directly::
        
        >>> import expWorkbench.util as util
        >>> util.save_results(results, file)
          
        
        
        """
        
        if type(cases) ==  types.IntType:
            cases, uncertainties = self._generate_cases(cases)
        elif type(cases) == types.ListType:
            uncertainties = self.determine_intersecting_uncertainties()[0]
            uncertaintyNames = cases[0].keys()
            uncertainties = [uncertainty for uncertainty in uncertainties if 
                             uncertainty.name in uncertaintyNames]
        else:
            raise EMAError("unknown type for cases")
        
        if not self._policies:
            self._policies.append({"name": "None"})

        nrOfExperiments =len(cases)*len(self._policies)*len(self._modelStructures) 
        info(str(nrOfExperiments) + 
             " experiment will be executed")
        
        #set outcomes to the intersect of outcomes across models
        outcomes = [msi.outcomes for msi in self._modelStructures]
        outcomes = set(outcomes[0]).intersection(*outcomes[:1])
        for msi in self._modelStructures:
            msi.outcomes = list(outcomes)
        if not outcomes:
            raise EMAError("no outcomes of interest defined")
                
        #initialize the callback object
        callback = callback(uncertainties, 
                            outcomes, 
                            nrOfExperiments,
                            reporting_interval=reporting_interval,
                            **kwargs)
                
        if self.parallel:
            info("preparing to perform experiment in parallel")
            
            if not self._pool:
                self.__make_pool(modelKwargs)
            info("starting to perform experiments in parallel")

            results = self._pool.runExperiments(cases, self._policies)
            
            for entry in results:
                try:
                    callback(*entry.get())
                except EMAParallelError as e:
                    exception(e)
                except Exception as e:
                    raise
        else:
            info("starting to perform experiments sequentially")

            def cleanup(modelInterfaces):
                for msi in modelInterfaces:
                    msi.cleanup()
                    del msi

            for policy in self._policies:
                for msi in self._modelStructures:
                    policyToRun = copy.deepcopy(policy)
                    try:
                        msi.model_init(policyToRun, modelKwargs)
                    except (EMAError, NotImplementedError) as inst:
                        exception(inst)
                        cleanup(self._modelStructures)
                        raise
    
                    for case in cases:
                        caseToRun = copy.deepcopy(case)
                        try:
                            msi.run_model(caseToRun)
                        except CaseError as e:
                            warning(str(e))
                        result = msi.retrieve_output()
                        msi.reset_model()
                        callback(case, policy, msi.name, 
                                 result
                                 )
            cleanup(self._modelStructures)
        
        results = callback.get_results()
        info("experiments finished")
        
        return results
    
#    def __optimize(self, 
#                  allele_order,
#                  setOfAlleles, 
#                  obj_function,
#                  nrOfGenerations,
#                  nrOfPopMembers,
#                  minimax,
#                  crossoverRate,
#                  mutationRate,
#                  elitism,
#                  reporting_interval,
#                  population=BaseEMAPopulation):
#        # make a genome with a length equal to the list of alleles
#        genome = G1DList.G1DList(len(setOfAlleles))
#        genome.setParams(allele=setOfAlleles)
#        
#        # The evaluator function (objective function)
#        # to be decided what to use as test function. In principle
#        # the test function is a function that transforms the genome
#        # to a case, runs the model, and returns the results
#        # ideally, we might remove that step entirely by not
#        # using ind.evaluate(**args) in the population...
#        genome.evaluator.set(obj_function)
#        genome.crossover.set(Crossovers.G1DListCrossoverSinglePoint)
#        genome.mutator.set(Mutators.G1DListMutatorAllele)
#        genome.initializator.set(Initializators.G1DListInitializatorAllele)
#        
#        stats = StatisticsCallback(nrOfGenerations, nrOfPopMembers)
#        ga = EMAGA(genome, population)
#        ga.internalPop = population(genome, allele_order, self, reporting_interval)
#        ga.setMinimax(Consts.minimaxType[minimax])
#        ga.stepCallback.set(stats)
#        ga.selector.set(EMAoptimization.EMARankSelector)
#        
#        if elitism:
#            ga.setElitism(True)
#            ga.setElitismReplacement(elitism)
#        
#        # a generation contains nrOfPopMembers individuals
#        ga.setPopulationSize(nrOfPopMembers)
#        
#        # there are nrOfGeneration generations
#        ga.setGenerations(nrOfGenerations)
#        
#        # crossover and mutation    
#        ga.setCrossoverRate(crossoverRate)
#        ga.setMutationRate(mutationRate)
#
#        # perform optimization, print every 10 generations
#        # ideally, we intercept these messages and redirect them to
#        # ema_logging.
#        ema_logging.info("starting optimization")
#        ga.evolve()
#        
#        # return results for best fit
#        best_individual = ga.bestIndividual()
#        
#        best_case = {}
#        for i, key in enumerate(allele_order):
#            best_case[key] = best_individual.genomeList[i]
#        
#        c = ""
#        for key, value in best_case.items():
#            c += key
#            c += " : "
#            c += str(value)
#            c += '\n'
#        
#        info('best case:\n' + c )
#        info('raw score: ' + str(best_individual.score))
#        
#        results = {"best individual score": best_individual.score,
#                   "best individual ": best_individual,
#                   "stats": stats.stats,
#                   "raw": stats.rawScore,
#                   "fitness": stats.fitnessScore,
#                   "mutation ration": mutationRate,
#                   "crossover rate": crossoverRate,
#                   "minimax": minimax,
#                   "time elapsed": ga.get_time_elapsed()}
#        
#        return results    
#    
##    def perform_outcome_optimization(self, 
##                                     reporting_interval=100,
##                                     obj_function=None,
##                                     minimax = "maximize",
##                                     nrOfGenerations = 100,
##                                     nrOfPopMembers=100,
##                                     crossoverRate = 0.5, 
##                                     mutationRate = 0.02,
##                                     elitism = 0
##                                     ):
##        """
##        Method responsible for performing the optimization.
##        
##        :param reporting_interval: Parameter for specifying the frequency with
##                           which the callback reports the progress.
##                           (Default = 100) 
##        :param obj_function: The objective function to use. This objective 
##                             function receives the results for a single model
##                             run for all the specified outcomes of interest and
##                             should return a single score which should be 
##                             positive. 
##        :param minimax: String indicating whether to minimize or maximize the
##                        obj_function.
##        :param nrOfGenerations: The number of generations to evolve over.
##        :param nrOfPopulationMembers: The number of population members in a 
##                                      single generation.
##        :param crossoverRate: The crossover rate, between 0.0 and 1.0. 
##                              see `wikipedia <http://en.wikipedia.org/wiki/Crossover_%28genetic_algorithm%29>`__
##                              for details. (Default = 0.5)
##        :param mutationRate: The mutation rate, between 0.0 and 1.0.
##                             see `wikipedia <http://en.wikipedia.org/wiki/Mutation_%28genetic_algorithm%29>`__
##                             for details. (Default = 0.02)
##        :param elitism: The number of best individuals to copy to the next 
##                        generation. (Default = 0)
##        
##        :returns: A dict with info on the optimization including stats, best
##                  individual, and information on the optimization setup
##        
##        """
##
##        # Genome instance
##        setOfAlleles = GAllele.GAlleles()
##
##        allele_order = []
##        # deduce the alleles from the overlapping set of model structure 
##        # uncertainties
##        # the alleles should use the limits of uncertainty, and their dType
##        # in case of categorical uncertainties, the transform to the 
##        # category is delegated to a later stage (to be decided)
##        shared_uncertainties = self.determine_intersecting_uncertainties()[0]
##        for uncertainty in shared_uncertainties:
##            values = uncertainty.get_values()
##            dist = uncertainty.dist
##
##            if isinstance(uncertainty, CategoricalUncertainty):
##                allele = GAllele.GAlleleList(uncertainty.categories)
##            elif dist== INTEGER:
##                allele = GAllele.GAlleleRange(values[0], values[1])
##            else:
##                allele = GAllele.GAlleleRange(values[0], values[1], real=True)
##            
##            setOfAlleles.add(allele)
##            allele_order.append(uncertainty.name)
##        return self.__optimize(allele_order, 
##                               setOfAlleles, obj_function, 
##                              nrOfGenerations, nrOfPopMembers, minimax, 
##                              crossoverRate, mutationRate, elitism,
##                              reporting_interval,
##                              population=OutcomeOptimizationPopulation)
##
##
##    def perform_robust_optimization(self, 
##                                    cases,
##                                    reporting_interval=100,
##                                    obj_function=None,
##                                    policy_levers={},
##                                    minimax="maximize",
##                                    nrOfGenerations=100,
##                                    nrOfPopMembers=100,
##                                    crossoverRate=0.5, 
##                                    mutationRate=0.02,
##                                    elitism=0
##                                    ):
##        """
##        Method responsible for performing robust optimization.
##        
##        :param cases: In case of Latin Hypercube sampling and Monte Carlo 
##                      sampling, cases specifies the number of cases to
##                      generate. In case of Full Factorial sampling,
##                      cases specifies the resolution to use for sampling
##                      continuous uncertainties. Alternatively, one can supply
##                      a list of dicts, where each dicts contains a case.
##                      That is, an uncertainty name as key, and its value.
##        :param reporting_interval: Parameter for specifying the frequency with
##                                   which the callback reports the progress.
##                                   (Default = 100)         
##        :param obj_function: The objective function to use. This objective 
##                             function receives the results for a policy and
##                             the provided cases for all the specified outcomes 
##                             of interest and should return a single score which 
##                             should be positive. 
##        :param policy_levers: A dictionary with model parameter names as key
##                              and a dict as value. The dict should have two 
##                              fields: 'type' and 'values. Type is either
##                              list or range, and determines the appropriate
##                              allele type. Values are the parameters to 
##                              be used for the specific allele. 
##        :param minimax: String indicating whether to minimize or maximize the
##                        obj_function.
##        :param nrOfGenerations: The number of generations to evolve over.
##        :param nrOfPopulationMembers: The number of population members in a 
##                                      single generation.
##        :param crossoverRate: The crossover rate, between 0.0 and 1.0. 
##                              see `wikipedia <http://en.wikipedia.org/wiki/Crossover_%28genetic_algorithm%29>`__
##                              for details. (Default = 0.5)
##        :param mutationRate: The mutation rate, between 0.0 and 1.0.
##                             see `wikipedia <http://en.wikipedia.org/wiki/Mutation_%28genetic_algorithm%29>`__
##                             for details. (Default = 0.02)
##        :param elitism: The number of best individuals to copy to the next 
##                        generation. (Default = 0) 
##        
##        :returns: A dict with info on the optimization including stats, best
##                  individual, and information on the optimization setup
##        
##        """
##
##        # Genome instance
##        setOfAlleles = GAllele.GAlleles()
##        allele_order = []
##        for key, value in policy_levers.items():
##            type_allele = value['type'] 
##            value = value['values']
##            if type_allele=='range':
##                allele = GAllele.GAlleleRange(value[0], value[1], real=True)
##            elif type_allele=='list':
##                allele = GAllele.GAlleleList(value)
##            else:
##                raise EMAError("unknown allele type: possible types are range and list")
##            
##            setOfAlleles.add(allele)
##            allele_order.append(key)
##        
##        RobustOptimizationPopulation.cases = cases
##        return self.__optimize(allele_order, 
##                               setOfAlleles, 
##                               obj_function, 
##                               nrOfGenerations, 
##                               nrOfPopMembers, 
##                               minimax, 
##                               crossoverRate, 
##                               mutationRate,
##                               elitism,
##                               reporting_interval, 
##                               population=RobustOptimizationPopulation)
##    
##    def perform_maximin_optimization(self, 
##                                    reporting_interval=100,
##                                    obj_function1=None,
##                                    policy_levers={},
##                                    minimax1 = "minimize",
##                                    minimax2 = "maximize",                                   
##                                    nrOfGenerations1 = 100,
##                                    nrOfPopMembers1 = 100,
##                                    crossoverRate1 = 0.5, 
##                                    mutationRate1 = 0.02,
##                                    elitism1 = 0,
##                                    nrOfGenerations2 = 100,
##                                    nrOfPopMembers2 = 100,
##                                    crossoverRate2 = 0.5, 
##                                    mutationRate2 = 0.02,
##                                    elitism2 = 0
##                                    ):
##        
##        # Genome instance
##        setOfAlleles = GAllele.GAlleles()
##        allele_order = []
##        for key, value in policy_levers.items():
##            allele = GAllele.GAlleleRange(value[0], value[1], real=True)
##            
##            setOfAlleles.add(allele)
##            allele_order.append(key)
##        
##        MaximinOptimizationPopulation.optimizationType = minimax2
##        MaximinOptimizationPopulation.nrOfGenerations = nrOfGenerations2
##        MaximinOptimizationPopulation.nrOfPopMembers = nrOfPopMembers2
##        MaximinOptimizationPopulation.crossoverRate = crossoverRate2
##        MaximinOptimizationPopulation.mutationRate = mutationRate2
##        MaximinOptimizationPopulation.elitism = elitism2
##        
##        return self.__optimize(allele_order, 
##                               setOfAlleles, 
##                               obj_function1, 
##                               nrOfGenerations1, 
##                               nrOfPopMembers1, 
##                               minimax1, 
##                               crossoverRate1, 
##                               mutationRate1,
##                               elitism1,
##                               reporting_interval, 
##                               population=MaximinOptimizationPopulation)