def perform_experiments(self, cases, callback = DefaultCallback, reporting_interval=100, modelKwargs = {}, **kwargs): """ Method responsible for running the experiments on a structure. In case of multiple model structures, the outcomes are set to the intersection of the sets of outcomes of the various models. :param cases: In case of Latin Hypercube sampling and Monte Carlo sampling, cases specifies the number of cases to generate. In case of Full Factorial sampling, cases specifies the resolution to use for sampling continuous uncertainties. Alternatively, one can supply a list of dicts, where each dicts contains a case. That is, an uncertainty name as key, and its value. :param callback: Class that will be called after finishing a single experiment, :param reporting_interval: parameter for specifying the frequency with which the callback reports the progress. (Default is 100) :param modelKwargs: dictonary of keyword arguments to be passed to model_init :param kwargs: generic keyword arguments to pass on to callback :returns: a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ containing the experiments, and a dict with the names of the outcomes as keys and an numpy array as value. .. rubric:: suggested use In general, analysis scripts require both the structured array of the experiments and the dictionary of arrays containing the results. The recommended use is the following:: >>> results = ensemble.perform_experiments(10000) #recommended use >>> experiments, output = ensemble.perform_experiments(10000) #will work fine The latter option will work fine, but most analysis scripts require to wrap it up into a tuple again:: >>> data = (experiments, output) Another reason for the recommended use is that you can save this tuple directly:: >>> import expWorkbench.util as util >>> util.save_results(results, file) """ if type(cases) == types.IntType: cases, uncertainties = self._generate_cases(cases) elif type(cases) == types.ListType: uncertainties = self.determine_intersecting_uncertainties()[0] uncertaintyNames = cases[0].keys() uncertainties = [uncertainty for uncertainty in uncertainties if uncertainty.name in uncertaintyNames] else: raise EMAError("unknown type for cases") if not self._policies: self._policies.append({"name": "None"}) nrOfExperiments =len(cases)*len(self._policies)*len(self._modelStructures) info(str(nrOfExperiments) + " experiment will be executed") #set outcomes to the intersect of outcomes across models outcomes = [msi.outcomes for msi in self._modelStructures] outcomes = set(outcomes[0]).intersection(*outcomes[:1]) for msi in self._modelStructures: msi.outcomes = list(outcomes) if not outcomes: raise EMAError("no outcomes of interest defined") #initialize the callback object callback = callback(uncertainties, outcomes, nrOfExperiments, reporting_interval=reporting_interval, **kwargs) if self.parallel: info("preparing to perform experiment in parallel") if not self._pool: self.__make_pool(modelKwargs) info("starting to perform experiments in parallel") results = self._pool.runExperiments(cases, self._policies) for entry in results: try: callback(*entry.get()) except EMAParallelError as e: exception(e) except Exception as e: raise else: info("starting to perform experiments sequentially") def cleanup(modelInterfaces): for msi in modelInterfaces: msi.cleanup() del msi for policy in self._policies: for msi in self._modelStructures: policyToRun = copy.deepcopy(policy) try: msi.model_init(policyToRun, modelKwargs) except (EMAError, NotImplementedError) as inst: exception(inst) cleanup(self._modelStructures) raise for case in cases: caseToRun = copy.deepcopy(case) try: msi.run_model(caseToRun) except CaseError as e: warning(str(e)) result = msi.retrieve_output() msi.reset_model() callback(case, policy, msi.name, result ) cleanup(self._modelStructures) results = callback.get_results() info("experiments finished") return results # def __optimize(self, # allele_order, # setOfAlleles, # obj_function, # nrOfGenerations, # nrOfPopMembers, # minimax, # crossoverRate, # mutationRate, # elitism, # reporting_interval, # population=BaseEMAPopulation): # # make a genome with a length equal to the list of alleles # genome = G1DList.G1DList(len(setOfAlleles)) # genome.setParams(allele=setOfAlleles) # # # The evaluator function (objective function) # # to be decided what to use as test function. In principle # # the test function is a function that transforms the genome # # to a case, runs the model, and returns the results # # ideally, we might remove that step entirely by not # # using ind.evaluate(**args) in the population... # genome.evaluator.set(obj_function) # genome.crossover.set(Crossovers.G1DListCrossoverSinglePoint) # genome.mutator.set(Mutators.G1DListMutatorAllele) # genome.initializator.set(Initializators.G1DListInitializatorAllele) # # stats = StatisticsCallback(nrOfGenerations, nrOfPopMembers) # ga = EMAGA(genome, population) # ga.internalPop = population(genome, allele_order, self, reporting_interval) # ga.setMinimax(Consts.minimaxType[minimax]) # ga.stepCallback.set(stats) # ga.selector.set(EMAoptimization.EMARankSelector) # # if elitism: # ga.setElitism(True) # ga.setElitismReplacement(elitism) # # # a generation contains nrOfPopMembers individuals # ga.setPopulationSize(nrOfPopMembers) # # # there are nrOfGeneration generations # ga.setGenerations(nrOfGenerations) # # # crossover and mutation # ga.setCrossoverRate(crossoverRate) # ga.setMutationRate(mutationRate) # # # perform optimization, print every 10 generations # # ideally, we intercept these messages and redirect them to # # ema_logging. # ema_logging.info("starting optimization") # ga.evolve() # # # return results for best fit # best_individual = ga.bestIndividual() # # best_case = {} # for i, key in enumerate(allele_order): # best_case[key] = best_individual.genomeList[i] # # c = "" # for key, value in best_case.items(): # c += key # c += " : " # c += str(value) # c += '\n' # # info('best case:\n' + c ) # info('raw score: ' + str(best_individual.score)) # # results = {"best individual score": best_individual.score, # "best individual ": best_individual, # "stats": stats.stats, # "raw": stats.rawScore, # "fitness": stats.fitnessScore, # "mutation ration": mutationRate, # "crossover rate": crossoverRate, # "minimax": minimax, # "time elapsed": ga.get_time_elapsed()} # # return results # ## def perform_outcome_optimization(self, ## reporting_interval=100, ## obj_function=None, ## minimax = "maximize", ## nrOfGenerations = 100, ## nrOfPopMembers=100, ## crossoverRate = 0.5, ## mutationRate = 0.02, ## elitism = 0 ## ): ## """ ## Method responsible for performing the optimization. ## ## :param reporting_interval: Parameter for specifying the frequency with ## which the callback reports the progress. ## (Default = 100) ## :param obj_function: The objective function to use. This objective ## function receives the results for a single model ## run for all the specified outcomes of interest and ## should return a single score which should be ## positive. ## :param minimax: String indicating whether to minimize or maximize the ## obj_function. ## :param nrOfGenerations: The number of generations to evolve over. ## :param nrOfPopulationMembers: The number of population members in a ## single generation. ## :param crossoverRate: The crossover rate, between 0.0 and 1.0. ## see `wikipedia <http://en.wikipedia.org/wiki/Crossover_%28genetic_algorithm%29>`__ ## for details. (Default = 0.5) ## :param mutationRate: The mutation rate, between 0.0 and 1.0. ## see `wikipedia <http://en.wikipedia.org/wiki/Mutation_%28genetic_algorithm%29>`__ ## for details. (Default = 0.02) ## :param elitism: The number of best individuals to copy to the next ## generation. (Default = 0) ## ## :returns: A dict with info on the optimization including stats, best ## individual, and information on the optimization setup ## ## """ ## ## # Genome instance ## setOfAlleles = GAllele.GAlleles() ## ## allele_order = [] ## # deduce the alleles from the overlapping set of model structure ## # uncertainties ## # the alleles should use the limits of uncertainty, and their dType ## # in case of categorical uncertainties, the transform to the ## # category is delegated to a later stage (to be decided) ## shared_uncertainties = self.determine_intersecting_uncertainties()[0] ## for uncertainty in shared_uncertainties: ## values = uncertainty.get_values() ## dist = uncertainty.dist ## ## if isinstance(uncertainty, CategoricalUncertainty): ## allele = GAllele.GAlleleList(uncertainty.categories) ## elif dist== INTEGER: ## allele = GAllele.GAlleleRange(values[0], values[1]) ## else: ## allele = GAllele.GAlleleRange(values[0], values[1], real=True) ## ## setOfAlleles.add(allele) ## allele_order.append(uncertainty.name) ## return self.__optimize(allele_order, ## setOfAlleles, obj_function, ## nrOfGenerations, nrOfPopMembers, minimax, ## crossoverRate, mutationRate, elitism, ## reporting_interval, ## population=OutcomeOptimizationPopulation) ## ## ## def perform_robust_optimization(self, ## cases, ## reporting_interval=100, ## obj_function=None, ## policy_levers={}, ## minimax="maximize", ## nrOfGenerations=100, ## nrOfPopMembers=100, ## crossoverRate=0.5, ## mutationRate=0.02, ## elitism=0 ## ): ## """ ## Method responsible for performing robust optimization. ## ## :param cases: In case of Latin Hypercube sampling and Monte Carlo ## sampling, cases specifies the number of cases to ## generate. In case of Full Factorial sampling, ## cases specifies the resolution to use for sampling ## continuous uncertainties. Alternatively, one can supply ## a list of dicts, where each dicts contains a case. ## That is, an uncertainty name as key, and its value. ## :param reporting_interval: Parameter for specifying the frequency with ## which the callback reports the progress. ## (Default = 100) ## :param obj_function: The objective function to use. This objective ## function receives the results for a policy and ## the provided cases for all the specified outcomes ## of interest and should return a single score which ## should be positive. ## :param policy_levers: A dictionary with model parameter names as key ## and a dict as value. The dict should have two ## fields: 'type' and 'values. Type is either ## list or range, and determines the appropriate ## allele type. Values are the parameters to ## be used for the specific allele. ## :param minimax: String indicating whether to minimize or maximize the ## obj_function. ## :param nrOfGenerations: The number of generations to evolve over. ## :param nrOfPopulationMembers: The number of population members in a ## single generation. ## :param crossoverRate: The crossover rate, between 0.0 and 1.0. ## see `wikipedia <http://en.wikipedia.org/wiki/Crossover_%28genetic_algorithm%29>`__ ## for details. (Default = 0.5) ## :param mutationRate: The mutation rate, between 0.0 and 1.0. ## see `wikipedia <http://en.wikipedia.org/wiki/Mutation_%28genetic_algorithm%29>`__ ## for details. (Default = 0.02) ## :param elitism: The number of best individuals to copy to the next ## generation. (Default = 0) ## ## :returns: A dict with info on the optimization including stats, best ## individual, and information on the optimization setup ## ## """ ## ## # Genome instance ## setOfAlleles = GAllele.GAlleles() ## allele_order = [] ## for key, value in policy_levers.items(): ## type_allele = value['type'] ## value = value['values'] ## if type_allele=='range': ## allele = GAllele.GAlleleRange(value[0], value[1], real=True) ## elif type_allele=='list': ## allele = GAllele.GAlleleList(value) ## else: ## raise EMAError("unknown allele type: possible types are range and list") ## ## setOfAlleles.add(allele) ## allele_order.append(key) ## ## RobustOptimizationPopulation.cases = cases ## return self.__optimize(allele_order, ## setOfAlleles, ## obj_function, ## nrOfGenerations, ## nrOfPopMembers, ## minimax, ## crossoverRate, ## mutationRate, ## elitism, ## reporting_interval, ## population=RobustOptimizationPopulation) ## ## def perform_maximin_optimization(self, ## reporting_interval=100, ## obj_function1=None, ## policy_levers={}, ## minimax1 = "minimize", ## minimax2 = "maximize", ## nrOfGenerations1 = 100, ## nrOfPopMembers1 = 100, ## crossoverRate1 = 0.5, ## mutationRate1 = 0.02, ## elitism1 = 0, ## nrOfGenerations2 = 100, ## nrOfPopMembers2 = 100, ## crossoverRate2 = 0.5, ## mutationRate2 = 0.02, ## elitism2 = 0 ## ): ## ## # Genome instance ## setOfAlleles = GAllele.GAlleles() ## allele_order = [] ## for key, value in policy_levers.items(): ## allele = GAllele.GAlleleRange(value[0], value[1], real=True) ## ## setOfAlleles.add(allele) ## allele_order.append(key) ## ## MaximinOptimizationPopulation.optimizationType = minimax2 ## MaximinOptimizationPopulation.nrOfGenerations = nrOfGenerations2 ## MaximinOptimizationPopulation.nrOfPopMembers = nrOfPopMembers2 ## MaximinOptimizationPopulation.crossoverRate = crossoverRate2 ## MaximinOptimizationPopulation.mutationRate = mutationRate2 ## MaximinOptimizationPopulation.elitism = elitism2 ## ## return self.__optimize(allele_order, ## setOfAlleles, ## obj_function1, ## nrOfGenerations1, ## nrOfPopMembers1, ## minimax1, ## crossoverRate1, ## mutationRate1, ## elitism1, ## reporting_interval, ## population=MaximinOptimizationPopulation)
def perform_experiments(self, cases, callback=DefaultCallback, reporting_interval=100, model_kwargs = {}, which_uncertainties=INTERSECTION, which_outcomes=INTERSECTION, **kwargs): """ Method responsible for running the experiments on a structure. In case of multiple model structures, the outcomes are set to the intersection of the sets of outcomes of the various models. :param cases: In case of Latin Hypercube sampling and Monte Carlo sampling, cases specifies the number of cases to generate. In case of Full Factorial sampling, cases specifies the resolution to use for sampling continuous uncertainties. Alternatively, one can supply a list of dicts, where each dicts contains a case. That is, an uncertainty name as key, and its value. :param callback: Class that will be called after finishing a single experiment, :param reporting_interval: parameter for specifying the frequency with which the callback reports the progress. (Default is 100) :param model_kwargs: dictionary of keyword arguments to be passed to model_init :param which_uncertainties: keyword argument for controlling whether, in case of multiple model structure interfaces, the intersection or the union of uncertainties should be used. (Default is intersection). :param which_uncertainties: keyword argument for controlling whether, in case of multiple model structure interfaces, the intersection or the union of outcomes should be used. (Default is intersection). :param kwargs: generic keyword arguments to pass on to callback :returns: a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ containing the experiments, and a dict with the names of the outcomes as keys and an numpy array as value. .. rubric:: suggested use In general, analysis scripts require both the structured array of the experiments and the dictionary of arrays containing the results. The recommended use is the following:: >>> results = ensemble.perform_experiments(10000) #recommended use >>> experiments, output = ensemble.perform_experiments(10000) The latter option will work fine, but most analysis scripts require to wrap it up into a tuple again:: >>> data = (experiments, output) Another reason for the recommended use is that you can save this tuple directly:: >>> import expWorkbench.util as util >>> util.save_results(results, filename) .. note:: The current implementation has a hard coded limit to the number of designs possible. This is set to 50.000 designs. If one want to go beyond this, set `self.max_designs` to a higher value. """ if not self._policies: self._policies.append({"name": "None"}) # identify the uncertainties and sample over them if type(cases) == types.IntType: sampled_unc, unc_dict = self._generate_samples(cases, which_uncertainties) nr_of_exp =self.sampler.deterimine_nr_of_designs(sampled_unc)\ *len(self._policies)*len(self._msis) experiments = self._generate_experiments(sampled_unc) elif type(cases) == types.ListType: unc_dict = self.determine_uncertainties()[1] unc_names = cases[0].keys() sampled_unc = {name:[] for name in unc_names} nr_of_exp = len(cases)*len(self._policies)*len(self._msis) experiments = self._generate_experiments(cases) else: raise EMAError("unknown type for cases") uncertainties = [unc_dict[unc] for unc in sorted(sampled_unc)] # identify the outcomes that are to be included overview_dict, element_dict = self._determine_unique_attributes("outcomes") if which_outcomes==UNION: outcomes = element_dict.keys() elif which_outcomes==INTERSECTION: outcomes = overview_dict[tuple([msi.name for msi in self._msis])] outcomes = [outcome.name for outcome in outcomes] else: raise ValueError("incomplete value for which_outcomes") info(str(nr_of_exp) + " experiment will be executed") #initialize the callback object callback = callback(uncertainties, outcomes, nr_of_exp, reporting_interval=reporting_interval, **kwargs) if self.parallel: info("preparing to perform experiment in parallel") if not self._pool: self._make_pool(model_kwargs) info("starting to perform experiments in parallel") self._pool.run_experiments(experiments, callback) else: info("starting to perform experiments sequentially") def cleanup(modelInterfaces): for msi in modelInterfaces: msi.cleanup() del msi msi_initialization_dict = {} msis = {msi.name: msi for msi in self._msis} job_counter = itertools.count() cwd = os.getcwd() for experiment in experiments: case_id = job_counter.next() policy = experiment.pop('policy') msi = experiment.pop('model') # check whether we already initialized the model for this # policy if not msi_initialization_dict.has_key((policy['name'], msi)): try: debug("invoking model init") msis[msi].model_init(copy.deepcopy(policy),\ copy.deepcopy(model_kwargs)) except (EMAError, NotImplementedError) as inst: exception(inst) cleanup(self._msis) raise except Exception: exception("some exception occurred when invoking the init") cleanup(self._msis) raise debug("initialized model %s with policy %s" % (msi, policy['name'])) #always, only a single initialized msi instance msi_initialization_dict = {(policy['name'], msi):msis[msi]} msi = msis[msi] case = copy.deepcopy(experiment) try: debug("trying to run model") msi.run_model(case) except CaseError as e: warning(str(e)) debug("trying to retrieve output") result = msi.retrieve_output() msi.reset_model() debug("trying to reset model") callback(case_id, experiment, policy, msi.name, result) cleanup(self._msis) os.chdir(cwd) results = callback.get_results() info("experiments finished") return results
def perform_regret_analysis(results, policyOfInterest, uncertainty1, uncertainty2, resolution, outcomeNames = []): ''' perform a RAND-style regret analysis. That is, calculate regret across all runs. Regret is here understood as the regret of the policy of interest as compared to the best performing other policy. Identify the case in which the regret is maximized. Show a 2-d slice across two specified uncertainties, which contains the case where the regret is maximized. So, in this slice all the uncertainties apart from the 2 specified, are equal to their value in the case were the regret is maximized. Function requires a full factorial sampling as the experimental design to work. input: results default returnValue from modelEnsemble.runExperiments() policyOfInterest name of policy for which you want to calculate the regret uncertainty1 the uncertainty across which you want to slice uncertainty2 the uncertainty across which you want to slice resolution resolution used in generating the full factorial outcomeNames if provided, this should be a list of names of outcomes where high is bad the normalized results for these outcomes will be reverted NOTE: please provide the actual uncertainty, not their name returns: regret 1-d array that specifies the regret of policy to all other policies case ''' def getIndex(range, resolution, value): ''' helper function to transform a case to an index in the regretPlotArray ''' return ((resolution-1) * (value- range[0]))/ (range[1]-range[0]) regret, cases, uncertainties = calculate_regret(results, policyOfInterest, outcomeNames) # transform regret into a dictionary for quick lookup regretDict = {} for entry in zip(cases, regret): regretDict[entry[0]] = entry[1] #identify maximum regret case maximumRegret, case = max_regret(regret, cases) # generate the cases that should be in the slice # # by generating the cases we need for the slice here # and combining it with the dict structure, we can fill the # slice up quickly # # another alternative approach would be to filter the available cases # based on the case that maximizes the regret. Only the specified # uncertainties should be allowed to vary. This, however, would require # us to go over the entire list of cases which can potentially become # very slow # sampler = FullFactorialSampler() designs = sampler.generate_design([uncertainty1, uncertainty2], resolution)[0] designs = [design for design in designs] # get the indexes of the uncertainties # we use the max regret case and only modify the entries for # the uncertainties across which we want to slice index1 = uncertainties.index(uncertainty1.name) index2 = uncertainties.index(uncertainty2.name) # deduce the shape of the slice if len(designs) < resolution**2: resolution1 = len(set(np.asarray(designs)[:, 0])) resolution2 = len(set(np.asarray(designs)[:, 1])) shape = (resolution1, resolution2) else: shape = (resolution, resolution) regretPlot = np.zeros(shape) case = list(case) for design in designs: case[index1] = design[0] case[index2] = design[1] # map case values back to index in regretPlot i = int(round( getIndex(uncertainty1.get_values(), regretPlot.shape[0], design[0]), 0)) j = int(round( getIndex(uncertainty2.get_values(), regretPlot.shape[1], design[1]), 0)) # retrieve regret for particular case try: a = regretDict.get(tuple(case)) # print a regretPlot[i, j] = np.max(a) except KeyError as e: ema_logging.exception('case not found') raise e return regretPlot