def _count(process, region): """Computes the weighted event count of a process in a region. Args: process: The process whose events should be counted region: The region whose weighting/selection should be applied Returns: The weighted event count in the region. """ # Compute weighted selection selection, weight = region.selection_weight() # Compute the weighted selection properties required_properties = set() required_properties.update(properties(selection)) required_properties.update(properties(weight)) # Load data data = process.load(required_properties) # Apply selection if specified if selection != '': data = data[data.eval(normalized(selection))] # Compute the weighted or unweighted count if weight != '': return data.eval(normalized(weight)).sum() else: return len(data)
def __init__(self, selection): """Initializes a new instance of the Filter class. Args: selection: The selection expression to apply to the process data """ self._selection = normalized(selection)
def __init__(self, weight): """Initializes a new instance of the Reweighted class. Args: weight: The weight expression to incorporate into the region """ # Store the weight self._weight = normalized(weight)
def _histogram(process, region, expressions, binnings, load_hints = None): """Generates a ROOT histogram of a distribution a process in a region. Args: process: The process whose events should be histogrammed region: The region whose weighting/selection should be applied expressions: A tuple of expression strings binnings: A tuple of Binning instances distribution: The distribution to histogram load_hints: If provided, this argument will hint to _histogram that it should load additional properties when loading data and that it should use the _caching_loader. This facilitates cached loading of data across multiple calls to _histogram with the same process. This is particularly useful for parallelized histogramming, where the jobs are grouped by process. Returns: A ROOT histogram, of the TH1F, TH2F, or TH3F variety. """ # Compute weighted selection selection, weight = region.selection_weight() # Expand binnings to edge lists edges = tuple((b.edges() for b in binnings)) # Load data if load_hints is not None: # If load_hints have been provided, just use those with the # _caching_loader data = _caching_loader(process, load_hints) else: # Otherwise manually create the set of necessary properties # NOTE: All we need to do are region and expression properties - patch # properties are handled internally by the process required_properties = set() # Add those properties necessary to evaluate region selection/weight required_properties.update(properties(selection)) required_properties.update(properties(weight)) # Add in those properties necessary to evaluate expressions required_properties.update(*(properties(e) for e in expressions)) # Load data data = process.load(required_properties) # Apply selection if specified if selection != '': data = data[data.eval(normalized(selection))] # Evaluate each variable expression, converting the resultant Pandas Series # to a NumPy array # HACK: TH1::FillN only supports 64-bit floating point values, so convert # things. Would be nice to find a better approach. samples = tuple((data.eval(normalized(e)).values.astype(numpy.float64) for e in expressions)) # Evaluate weights, converting the resultant Pandas Series to a NumPy array # HACK: TH1::FillN only supports 64-bit floating point values, so convert # things. Would be nice to find a better approach. if weight != '': weights = data.eval(normalized(weight)).values.astype(numpy.float64) else: weights = nullptr # Create a unique name and title for the histogram name = title = uuid4().hex # Create a histogram based on dimensionality # NOTE: When specifying explicit bin edges, you aren't passing a length # argument, you are passing an nbins argument, which is length - 1, hence # the code below. If you pass length for n bins, then you'll get garbage # for the last bin's upper edge and things go nuts in ROOT. dimensionality = len(expressions) count = len(data) if dimensionality == 1: # Create a one-dimensional histogram result = TH1F(name, title, len(edges[0]) - 1, edges[0]) # Fill the histogram # HACK: TH1::FillN will die if N == 0 if count > 0: result.FillN(count, samples[0], weights) elif dimensionality == 2: # Create a two-dimensional histogram result = TH2F(name, title, len(edges[0]) - 1, edges[0], len(edges[1]) - 1, edges[1]) # Fill the histogram # HACK: TH1::FillN will die if N == 0 if count > 0: result.FillN(count, samples[0], samples[1], weights) elif dimensionality == 3: # Create a three-dimensional histogram result = TH3F(name, title, len(edges[0]) - 1, edges[0], len(edges[1]) - 1, edges[1], len(edges[2]) - 1, edges[2]) # HACK: TH3 doesn't have a FillN method, so we have to do things the # slow way. # TODO: We may want to put a warning about this slowness if weights == nullptr: weights = numpy.ones(count, dtype = numpy.float64) for x, y, z, w in zip(samples[0], samples[1], samples[2], weights): result.Fill(x, y, z, w) else: raise ValueError('ROOT can only histograms 1 - 3 dimensions') # All done return result
def _histogram(process, region, expressions, binnings, load_hints=None): """Generates a ROOT histogram of a distribution a process in a region. Args: process: The process whose events should be histogrammed region: The region whose weighting/selection should be applied expressions: A tuple of expression strings binnings: A tuple of Binning instances distribution: The distribution to histogram load_hints: If provided, this argument will hint to _histogram that it should load additional properties when loading data and that it should use the _caching_loader. This facilitates cached loading of data across multiple calls to _histogram with the same process. This is particularly useful for parallelized histogramming, where the jobs are grouped by process. Returns: A ROOT histogram, of the TH1F, TH2F, or TH3F variety. """ # Compute weighted selection selection, weight = region.selection_weight() # Expand binnings to edge lists edges = tuple((b.edges() for b in binnings)) # Load data if load_hints is not None: # If load_hints have been provided, just use those with the # _caching_loader data = _caching_loader(process, load_hints) else: # Otherwise manually create the set of necessary properties # NOTE: All we need to do are region and expression properties - patch # properties are handled internally by the process required_properties = set() # Add those properties necessary to evaluate region selection/weight required_properties.update(properties(selection)) required_properties.update(properties(weight)) # Add in those properties necessary to evaluate expressions required_properties.update(*(properties(e) for e in expressions)) # Load data data = process.load(required_properties) # Apply selection if specified if selection != '': data = data[data.eval(normalized(selection))] # Evaluate each variable expression, converting the resultant Pandas Series # to a NumPy array # HACK: TH1::FillN only supports 64-bit floating point values, so convert # things. Would be nice to find a better approach. samples = tuple((data.eval(normalized(e)).values.astype(numpy.float64) for e in expressions)) # Evaluate weights, converting the resultant Pandas Series to a NumPy array # HACK: TH1::FillN only supports 64-bit floating point values, so convert # things. Would be nice to find a better approach. if weight != '': weights = data.eval(normalized(weight)).values.astype(numpy.float64) else: weights = nullptr # Create a unique name and title for the histogram name = title = uuid4().hex # Create a histogram based on dimensionality # NOTE: When specifying explicit bin edges, you aren't passing a length # argument, you are passing an nbins argument, which is length - 1, hence # the code below. If you pass length for n bins, then you'll get garbage # for the last bin's upper edge and things go nuts in ROOT. dimensionality = len(expressions) count = len(data) if dimensionality == 1: # Create a one-dimensional histogram result = TH1F(name, title, len(edges[0]) - 1, edges[0]) # Fill the histogram # HACK: TH1::FillN will die if N == 0 if count > 0: result.FillN(count, samples[0], weights) elif dimensionality == 2: # Create a two-dimensional histogram result = TH2F(name, title, len(edges[0]) - 1, edges[0], len(edges[1]) - 1, edges[1]) # Fill the histogram # HACK: TH1::FillN will die if N == 0 if count > 0: result.FillN(count, samples[0], samples[1], weights) elif dimensionality == 3: # Create a three-dimensional histogram result = TH3F(name, title, len(edges[0]) - 1, edges[0], len(edges[1]) - 1, edges[1], len(edges[2]) - 1, edges[2]) # HACK: TH3 doesn't have a FillN method, so we have to do things the # slow way. # TODO: We may want to put a warning about this slowness if weights == nullptr: weights = numpy.ones(count, dtype=numpy.float64) for x, y, z, w in zip(samples[0], samples[1], samples[2], weights): result.Fill(x, y, z, w) else: raise ValueError('ROOT can only histograms 1 - 3 dimensions') # All done return result
def test_normalize(self): # Check that normalization works self.assertEqual(normalized("!x && y || z"), "~x & y | z")
def test_normalize(self): # Check that normalization works self.assertEqual(normalized('!x && y || z'), '~x & y | z')