def apply_function(self): #if self.calc_mode == 'binned': # raise NotImplementedError('Needs some care, broken in pisa4') # self.data.representation = self.apply_mode # for container in self.data: # # calcualte errors # if self.error_method in ['sumw2']: # vectorizer.pow( # vals=container['weights'], # pwr=2, # out=container['weights_squared'], # ) # vectorizer.sqrt( # vals=container['weights_squared'], out=container['errors'] # ) if isinstance(self.calc_mode, MultiDimBinning): for container in self.data: container.representation = self.calc_mode weights = container['weights'] transform = container['hist_transform'] hist = weights @ transform if self.error_method == 'sumw2': sumw2 = np.square(weights) @ transform container.representation = self.apply_mode container['weights'] = hist if self.error_method == 'sumw2': container['errors'] = np.sqrt(sumw2) elif self.calc_mode == 'events': for container in self.data: # calcualte errors container.representation = self.calc_mode sample = [container[name] for name in self.apply_mode.names] weights = container['weights'] hist = histogram(sample, weights, self.apply_mode, averaged=False) if self.error_method == 'sumw2': sumw2 = histogram(sample, np.square(weights), self.apply_mode, averaged=False) container.representation = self.apply_mode container['weights'] = hist if self.error_method == 'sumw2': container['errors'] = np.sqrt(sumw2)
def apply_function(self): if isinstance(self.calc_mode, MultiDimBinning): for container in self.data: container.representation = self.calc_mode weights = container['weights'] transform = container['hist_transform'] hist = weights @ transform if self.error_method == 'sumw2': sumw2 = np.square(weights) @ transform container.representation = self.apply_mode container['weights'] = hist if self.error_method == 'sumw2': container['errors'] = np.sqrt(sumw2) elif self.calc_mode == 'events': for container in self.data: container.representation = self.calc_mode sample = [] dims_log = [d.is_log for d in self.apply_mode] dims_ire = [d.is_irregular for d in self.apply_mode] for dim, is_log, is_ire in zip(self.regularized_apply_mode, dims_log, dims_ire): if is_log and not is_ire: container.representation = "log_events" sample.append(container[dim.name]) else: container.representation = "events" sample.append(container[dim.name]) weights = container['weights'] # The hist is now computed using a binning that is completely linear # and regular hist = histogram(sample, weights, self.regularized_apply_mode, averaged=False) if self.error_method == 'sumw2': sumw2 = histogram(sample, np.square(weights), self.regularized_apply_mode, averaged=False) container.representation = self.apply_mode container['weights'] = hist if self.error_method == 'sumw2': container['errors'] = np.sqrt(sumw2)
def setup_function(self): assert isinstance( self.apply_mode, MultiDimBinning ), "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode if isinstance(self.calc_mode, MultiDimBinning): # The two binning must be exclusive assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0 transform_binning = self.calc_mode + self.apply_mode # go to "events" mode to create the transforms for container in self.data: self.data.representation = "events" sample = [container[name] for name in transform_binning.names] hist = histogram(sample, None, transform_binning, averaged=False) transform = hist.reshape(self.calc_mode.shape + (-1, )) self.data.representation = self.calc_mode container['hist_transform'] = transform
def array_to_binned(self, key, binning, averaged=True): ''' histogram data array into binned data Parameters ---------- key : str binning : MultiDimBinning averaged : bool if True, the histogram entries are averages of the numbers that end up in a given bin. This for example must be used when oscillation probabilities are translated.....otherwise we end up with probability*count per bin right now CPU only ToDo: make work for n-dim ''' logging.debug('Transforming %s array to binned data' % (key)) weights = self.array_data[key] sample = [self.array_data[n] for n in binning.names] hist = histogram(sample, weights, binning, averaged) self.add_binned_data(key, (binning, hist))
def array_to_binned(self, key, src_representation, dest_representation): """Histogram data array into binned data Parameters ---------- key : str src_representation : str dest_representation : MultiDimBinning #averaged : bool # if True, the histogram entries are averages of the numbers that # end up in a given bin. This for example must be used when oscillation # probabilities are translated.....otherwise we end up with probability*count # per bin Notes ----- right now, CPU-only """ # TODO: make work for n-dim logging.trace('Transforming %s array to binned data' % (key)) assert src_representation in self.array_representations assert isinstance(dest_representation, MultiDimBinning) if not dest_representation.is_irregular: sample = [] dimensions = [] for d in dest_representation: if d.is_log: self.representation = "log_events" sample.append(self[d.name]) dimensions.append( OneDimBinning(d.name, domain=np.log(d.domain.m), num_bins=d.num_bins)) else: self.representation = "events" sample.append(self[d.name]) dimensions.append(d) hist_binning = MultiDimBinning(dimensions) else: self.representation = src_representation sample = [self[name] for name in dest_representation.names] hist_binning = dest_representation self.representation = src_representation weights = self[key] hist = histogram(sample, weights, hist_binning, averaged=True) return hist
def setup_function(self): assert isinstance( self.apply_mode, MultiDimBinning ), "Hist stage needs a binning as `apply_mode`, but is %s" % self.apply_mode if isinstance(self.calc_mode, MultiDimBinning): # The two binning must be exclusive assert len(set(self.calc_mode.names) & set(self.apply_mode.names)) == 0 transform_binning = self.calc_mode + self.apply_mode # go to "events" mode to create the transforms for container in self.data: self.data.representation = "events" sample = [container[name] for name in transform_binning.names] hist = histogram(sample, None, transform_binning, averaged=False) transform = hist.reshape(self.calc_mode.shape + (-1, )) self.data.representation = self.calc_mode container['hist_transform'] = transform elif self.calc_mode == "events": # For dimensions where the binning is irregular, we pre-compute the # index that each sample falls into and then bin regularly in the index. # For dimensions that are logarithmic, we add a linear binning in # the logarithm. dimensions = [] for dim in self.apply_mode: if dim.is_irregular: # create a new axis with digitized variable varname = dim.name + "__" + self.apply_mode.name + "_idx" new_dim = OneDimBinning(varname, domain=[0, dim.num_bins], num_bins=dim.num_bins) dimensions.append(new_dim) for container in self.data: container.representation = "events" x = container[dim.name] * dim.units # Compute the bin index each sample would fall into, and # shift by -1 such that samples below the binning range # get assigned the index -1. x_idx = np.searchsorted(dim.bin_edges, x, side="right") - 1 # To be consistent with numpy histogramming, we need to # shift those values that are exactly at the uppermost edge # down one index such that they are included in the highest # bin instead of being treated as an outlier. on_edge = (x == dim.bin_edges[-1]) x_idx[on_edge] -= 1 container[varname] = x_idx elif dim.is_log: # We don't compute the log of the variable just yet, this # will be done later during `apply_function` using the # representation mechanism. new_dim = OneDimBinning(dim.name, domain=np.log(dim.domain.m), num_bins=dim.num_bins) dimensions.append(new_dim) else: dimensions.append(dim) self.regularized_apply_mode = MultiDimBinning(dimensions) logging.debug("Using regularized binning:\n" + str(self.regularized_apply_mode)) else: raise ValueError(f"unknown calc mode: {self.calc_mode}")
def apply_function(self): if isinstance(self.calc_mode, MultiDimBinning): if self.unweighted: raise NotImplementedError( "Unweighted hist only implemented in event-wise calculation" ) for container in self.data: container.representation = self.calc_mode if "astro_weights" in container.keys: weights = container["weights"] + container["astro_weights"] else: weights = container["weights"] transform = container["hist_transform"] hist = weights @ transform if self.error_method == "sumw2": sumw2 = np.square(weights) @ transform container.representation = self.apply_mode container["weights"] = hist if self.error_method == "sumw2": container["errors"] = np.sqrt(sumw2) elif self.calc_mode == "events": for container in self.data: container.representation = self.calc_mode sample = [] dims_log = [d.is_log for d in self.apply_mode] dims_ire = [d.is_irregular for d in self.apply_mode] for dim, is_log, is_ire in zip(self.regularized_apply_mode, dims_log, dims_ire): if is_log and not is_ire: container.representation = "log_events" sample.append(container[dim.name]) else: container.representation = "events" sample.append(container[dim.name]) if self.unweighted: if "astro_weights" in container.keys: weights = np.ones_like(container["weights"] + container["astro_weights"]) else: weights = np.ones_like(container["weights"]) else: if "astro_weights" in container.keys: weights = container["weights"] + container[ "astro_weights"] else: weights = container["weights"] # The hist is now computed using a binning that is completely linear # and regular hist = histogram(sample, weights, self.regularized_apply_mode, averaged=False) if self.error_method == "sumw2": sumw2 = histogram( sample, np.square(weights), self.regularized_apply_mode, averaged=False, ) container.representation = self.apply_mode container["weights"] = hist if self.error_method == "sumw2": container["errors"] = np.sqrt(sumw2)