def __init__(self, gui=False, thd=False): ''' Create a link with netlogo. Underneath, the netlogo jvm is started throuhg jpype. :param gui: boolean, if true run netlogo with gui, otherwise run in headless mode. Defaults to false. :param thd: boolean, if thrue start netlogo in 3d mode. Defaults to false ''' if not jpype.isJVMStarted(): # netlogo jars jars = [NETLOGO_HOME + r'/lib/scala-library.jar', NETLOGO_HOME + r'/lib/asm-all-3.3.1.jar', NETLOGO_HOME + r'/lib/picocontainer-2.13.6.jar', NETLOGO_HOME + r'/lib/log4j-1.2.16.jar', NETLOGO_HOME + r'/lib/jmf-2.1.1e.jar', NETLOGO_HOME + r'/lib/pegdown-1.1.0.jar', NETLOGO_HOME + r'/lib/parboiled-core-1.0.2.jar', NETLOGO_HOME + r'/lib/parboiled-java-1.0.2.jar', NETLOGO_HOME + r'/lib/mrjadapter-1.2.jar', NETLOGO_HOME + r'/lib/jhotdraw-6.0b1.jar', NETLOGO_HOME + r'/lib/quaqua-7.3.4.jar', NETLOGO_HOME + r'/lib/swing-layout-7.3.4.jar', NETLOGO_HOME + r'/lib/jogl-1.1.1.jar', NETLOGO_HOME + r'/lib/gluegen-rt-1.1.1.jar', NETLOGO_HOME + r'/NetLogo.jar', PYNETLOGO_HOME + r'/external_files/netlogoLink.jar'] # format jars in right format for starting java virtual machine # TODO the use of the jre here is only relevant under windows # apparently # might be solvable by setting netlogo home user.dir joined_jars = jar_separator.join(jars) jarpath = '-Djava.class.path={}'.format(joined_jars) jvm_handle = jpype.getDefaultJVMPath() jpype.startJVM(jvm_handle, jarpath, "-Xms128M","-Xmx1024m") jpype.java.lang.System.setProperty('user.dir', NETLOGO_HOME) if sys.platform=='darwin': jpype.java.lang.System.setProperty("java.awt.headless", "true"); debug("jvm started") link = jpype.JClass('netlogoLink.NetLogoLink') debug('NetLogoLink class found') if sys.platform == 'darwin' and gui: info('on mac only headless mode is supported') gui=False self.link = link(gui, thd) debug('NetLogoLink class instantiated')
def find_box(self): ''' Execute one iteration of the PRIM algorithm. That is, find one box, starting from the current state of Prim. ''' # set the indices self._update_yi_remaining() if self.yi_remaining.shape[0] == 0: info("no data remaining") return # log how much data and how many coi are remaining info(self.message.format(self.yi_remaining.shape[0], self.determine_coi(self.yi_remaining))) # make a new box that contains all the remaining data points box = PrimBox(self, self.box_init, self.yi_remaining[:]) # perform peeling phase box = self._peel(box) debug("peeling completed") # perform pasting phase box = self._paste(box) debug("pasting completed") message = "mean: {0}, mass: {1}, coverage: {2}, density: {3} restricted_dimensions: {4}" message = message.format(box.mean[-1], box.mass[-1], box.coverage[-1], box.density[-1], box.res_dim[-1]) if (self.threshold_type==ABOVE) &\ (box.mean[-1] >= self.threshold): info(message) self.boxes.append(box) return box elif (self.threshold_type==BELOW) &\ (box.mean[-1] <= self.threshold): info(message) self.boxes.append(box) return box else: # make a dump box info('box does not meet threshold criteria, value is {}, returning dump box'.format(box.mean[-1])) box = PrimBox(self, self.box_init, self.yi_remaining[:]) self.boxes.append(box) return box
def __filter(boxes, uncertainties=[]): dump_box=boxes[-1] boxes=boxes[0:-1] uv=uncertainties #iterate over uncertainties names = [] if uncertainties: uv=uncertainties else: uv = [entry[0] for entry in dump_box.dtype.descr] for name in uv: #determine whether to show for box in boxes: minimum = box[name][0] maximum = box[name][1] value = box.dtype.fields.get(name)[0] if value == 'object': a = dump_box[name][0] if len(a) != len(minimum): ans = False else: ans = np.all(np.equal(a, minimum)) if not ans: names.append(name) break elif (minimum > dump_box[name][0]) or\ (maximum < dump_box[name][1]): names.append(name) break a = set(uv) -set(names) a = list(a) a.sort() string_list = ", ".join(a) info(string_list + " are not not visualized because they are not restricted") uv = names return uv
def perform_prim(results, classify, peel_alpha = 0.05, paste_alpha = 0.05, mass_min = 0.05, threshold = None, pasting=True, threshold_type=1, obj_func=def_obj_func): r''' perform Patient Rule Induction Method (PRIM). This function performs the PRIM algorithm on the data. It uses a Python implementation of PRIM inspired by the `R <http://www.oga-lab.net/RGM2/func.php?rd_id=prim:prim-package>`_ algorithm. Compared to the R version, the Python version is data type aware. That is, real valued, ordinal, and categorical data are treated differently. Moreover, the pasting phase of PRIM in the R algorithm is not consistent with the literature. The Python version is. the PRIM algorithm tries to find subspaces of the input space that share some characteristic in the output space. The characteristic that the current implementation looks at is the mean of the results. Thus, the output space is 1-D, and the characteristic is assumed to be continuous. As a work around, to deal with classes, the user can supply a classify function. This function should return a binary classification (i.e. 1 or 0). Then, the mean of the box is indicative of the concentration of cases of class 1. That is, if the specified threshold is say 0.8 and the threshold_type is 1, PRIM looks for subspaces of the input space that contains at least 80\% cases of class 1. :param results: the return from :meth:`perform_experiments`. :param classify: either a string denoting the outcome of interest to use or a function. In case of a string and time series data, the end state is used. :param peel_alpha: parameter controlling the peeling stage (default = 0.05). :param paste_alpha: parameter controlling the pasting stage (default = 0.05). :param mass_min: minimum mass of a box (default = 0.05). :param threshold: the threshold of the output space that boxes should meet. :param pasting: perform pasting stage (default=True) :param threshold_type: If 1, the boxes should go above the threshold, if -1 the boxes should go below the threshold, if 0, the algorithm looks for both +1 and -1. :param obj_func: The objective function to use. Default is :func:`def_obj_func` :return: a list of PRIM objects. for each box, the scenario discovery metrics *coverage* and *density* are also calculated: .. math:: coverage=\frac {{\displaystyle\sum_{y_{i}\in{B}}y_{i}{'}}} {{\displaystyle\sum_{y_{i}\in{B^I}}y_{i}{'}}} where :math:`y_{i}{'}=1` if :math:`x_{i}\in{B}` and :math:`y_{i}{'}=0` otherwise. .. math:: density=\frac {{\displaystyle\sum_{y_{i}\in{B}}y_{i}{'}}} {{\displaystyle\left|{y_{i}}\right|\in{B}}} where :math:`y_{i}{'}=1` if :math:`x_{i}\in{B}` and :math:`y_{i}{'}=0` otherwise, and :math:`{\displaystyle\left|{y_{i}}\right|\in{B}}` is the cardinality of :math:`y_{i}`. Density is the ratio of the cases of interest in a box to the total number of cases in that box. *density* is identical to the mean in case of a binary classification. For more detail on these metrics see `Bryant and Lempert (2010) <http://www.sciencedirect.com/science/article/pii/S004016250900105X>`_ .. rubric:: references to relevant papers * `original PRIM paper <http://www.springerlink.com/content/x3gpv05t34620878/>`_ * `paper on ordinal data and PRIM <http://www.sciencedirect.com/science/article/pii/S095741740700231X>`_ **ema application** * `Lempert et al. (2006) <http://mansci.journal.informs.org/content/52/4/514>`_ * `Groves and Lempert (2007) <http://www.sciencedirect.com/science/article/pii/S0959378006000896#ref_bib19>`_ * `Bryant and Lempert (2010) <http://www.sciencedirect.com/science/article/pii/S004016250900105X>`_ ''' experiments, results = results #make y if type(classify) == StringType: results = results.get(classify) if len(results.shape) == 2: y = results[:, -1] else: y = results count = np.zeros(y.shape) count[y*threshold_type > threshold*threshold_type] = 1 cases_of_interest = np.sum(count) info("number of cases of interest is %d" % (np.sum(count))) elif callable(classify): y = classify(results) cases_of_interest = np.sum(y) info("number of cases of interest is %d" % (np.sum(y))) else: raise EMAError("incorrect specification of classify, this should be a function or a string") x = experiments #perform prim boxes = recursivePrim.perform_prim(x, y, box_init=None, peel_alpha=peel_alpha, paste_alpha=paste_alpha, mass_min=mass_min, threshold=threshold, pasting=pasting, threshold_type=threshold_type,obj_func=obj_func, cases_of_interest=cases_of_interest) #calculate scenario discovery metrics and add these to boxes boxes = calculate_sd_metrics(boxes, y, threshold, threshold_type) #return prim return boxes