Пример #1
0
def _count(process, region):
    """Computes the weighted event count of a process in a region.

    Args:
        process: The process whose events should be counted
        region: The region whose weighting/selection should be applied

    Returns:
        The weighted event count in the region.
    """
    # Compute weighted selection
    selection, weight = region.selection_weight()

    # Compute the weighted selection properties
    required_properties = set()
    required_properties.update(properties(selection))
    required_properties.update(properties(weight))

    # Load data
    data = process.load(required_properties)

    # Apply selection if specified
    if selection != '':
        data = data[data.eval(normalized(selection))]

    # Compute the weighted or unweighted count
    if weight != '':
        return data.eval(normalized(weight)).sum()
    else:
        return len(data)
Пример #2
0
    def __init__(self, selection):
        """Initializes a new instance of the Filter class.

        Args:
            selection: The selection expression to apply to the process data
        """
        self._selection = normalized(selection)
Пример #3
0
    def __init__(self, selection):
        """Initializes a new instance of the Filter class.

        Args:
            selection: The selection expression to apply to the process data
        """
        self._selection = normalized(selection)
Пример #4
0
    def __init__(self, weight):
        """Initializes a new instance of the Reweighted class.

        Args:
            weight: The weight expression to incorporate into the region
        """
        # Store the weight
        self._weight = normalized(weight)
Пример #5
0
    def __init__(self, weight):
        """Initializes a new instance of the Reweighted class.

        Args:
            weight: The weight expression to incorporate into the region
        """
        # Store the weight
        self._weight = normalized(weight)
Пример #6
0
def _histogram(process, region, expressions, binnings, load_hints = None):
    """Generates a ROOT histogram of a distribution a process in a region.

    Args:
        process: The process whose events should be histogrammed
        region: The region whose weighting/selection should be applied
        expressions: A tuple of expression strings
        binnings: A tuple of Binning instances
        distribution: The distribution to histogram
        load_hints: If provided, this argument will hint to _histogram that it
            should load additional properties when loading data and that it
            should use the _caching_loader.  This facilitates cached loading of
            data across multiple calls to _histogram with the same process.
            This is particularly useful for parallelized histogramming, where
            the jobs are grouped by process.

    Returns:
        A ROOT histogram, of the TH1F, TH2F, or TH3F variety.
    """
    # Compute weighted selection
    selection, weight = region.selection_weight()

    # Expand binnings to edge lists
    edges = tuple((b.edges() for b in binnings))

    # Load data
    if load_hints is not None:
        # If load_hints have been provided, just use those with the
        # _caching_loader
        data = _caching_loader(process, load_hints)
    else:
        # Otherwise manually create the set of necessary properties
        # NOTE: All we need to do are region and expression properties - patch
        # properties are handled internally by the process
        required_properties = set()

        # Add those properties necessary to evaluate region selection/weight
        required_properties.update(properties(selection))
        required_properties.update(properties(weight))

        # Add in those properties necessary to evaluate expressions
        required_properties.update(*(properties(e) for e in expressions))

        # Load data
        data = process.load(required_properties)

    # Apply selection if specified
    if selection != '':
        data = data[data.eval(normalized(selection))]

    # Evaluate each variable expression, converting the resultant Pandas Series
    # to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    samples = tuple((data.eval(normalized(e)).values.astype(numpy.float64)
                     for e
                     in expressions))

    # Evaluate weights, converting the resultant Pandas Series to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    if weight != '':
        weights = data.eval(normalized(weight)).values.astype(numpy.float64)
    else:
        weights = nullptr

    # Create a unique name and title for the histogram
    name = title = uuid4().hex

    # Create a histogram based on dimensionality
    # NOTE: When specifying explicit bin edges, you aren't passing a length
    # argument, you are passing an nbins argument, which is length - 1, hence
    # the code below.  If you pass length for n bins, then you'll get garbage
    # for the last bin's upper edge and things go nuts in ROOT.
    dimensionality = len(expressions)
    count = len(data)
    if dimensionality == 1:
        # Create a one-dimensional histogram
        result = TH1F(name, title,
                      len(edges[0]) - 1, edges[0])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], weights)
    elif dimensionality == 2:
        # Create a two-dimensional histogram
        result = TH2F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], samples[1], weights)
    elif dimensionality == 3:
        # Create a three-dimensional histogram
        result = TH3F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1],
                      len(edges[2]) - 1, edges[2])

        # HACK: TH3 doesn't have a FillN method, so we have to do things the
        # slow way.
        # TODO: We may want to put a warning about this slowness
        if weights == nullptr:
            weights = numpy.ones(count, dtype = numpy.float64)
        for x, y, z, w in zip(samples[0], samples[1], samples[2], weights):
            result.Fill(x, y, z, w)
    else:
        raise ValueError('ROOT can only histograms 1 - 3 dimensions')

    # All done
    return result
Пример #7
0
def _histogram(process, region, expressions, binnings, load_hints=None):
    """Generates a ROOT histogram of a distribution a process in a region.

    Args:
        process: The process whose events should be histogrammed
        region: The region whose weighting/selection should be applied
        expressions: A tuple of expression strings
        binnings: A tuple of Binning instances
        distribution: The distribution to histogram
        load_hints: If provided, this argument will hint to _histogram that it
            should load additional properties when loading data and that it
            should use the _caching_loader.  This facilitates cached loading of
            data across multiple calls to _histogram with the same process.
            This is particularly useful for parallelized histogramming, where
            the jobs are grouped by process.

    Returns:
        A ROOT histogram, of the TH1F, TH2F, or TH3F variety.
    """
    # Compute weighted selection
    selection, weight = region.selection_weight()

    # Expand binnings to edge lists
    edges = tuple((b.edges() for b in binnings))

    # Load data
    if load_hints is not None:
        # If load_hints have been provided, just use those with the
        # _caching_loader
        data = _caching_loader(process, load_hints)
    else:
        # Otherwise manually create the set of necessary properties
        # NOTE: All we need to do are region and expression properties - patch
        # properties are handled internally by the process
        required_properties = set()

        # Add those properties necessary to evaluate region selection/weight
        required_properties.update(properties(selection))
        required_properties.update(properties(weight))

        # Add in those properties necessary to evaluate expressions
        required_properties.update(*(properties(e) for e in expressions))

        # Load data
        data = process.load(required_properties)

    # Apply selection if specified
    if selection != '':
        data = data[data.eval(normalized(selection))]

    # Evaluate each variable expression, converting the resultant Pandas Series
    # to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    samples = tuple((data.eval(normalized(e)).values.astype(numpy.float64)
                     for e in expressions))

    # Evaluate weights, converting the resultant Pandas Series to a NumPy array
    # HACK: TH1::FillN only supports 64-bit floating point values, so convert
    # things.  Would be nice to find a better approach.
    if weight != '':
        weights = data.eval(normalized(weight)).values.astype(numpy.float64)
    else:
        weights = nullptr

    # Create a unique name and title for the histogram
    name = title = uuid4().hex

    # Create a histogram based on dimensionality
    # NOTE: When specifying explicit bin edges, you aren't passing a length
    # argument, you are passing an nbins argument, which is length - 1, hence
    # the code below.  If you pass length for n bins, then you'll get garbage
    # for the last bin's upper edge and things go nuts in ROOT.
    dimensionality = len(expressions)
    count = len(data)
    if dimensionality == 1:
        # Create a one-dimensional histogram
        result = TH1F(name, title, len(edges[0]) - 1, edges[0])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], weights)
    elif dimensionality == 2:
        # Create a two-dimensional histogram
        result = TH2F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1])

        # Fill the histogram
        # HACK: TH1::FillN will die if N == 0
        if count > 0:
            result.FillN(count, samples[0], samples[1], weights)
    elif dimensionality == 3:
        # Create a three-dimensional histogram
        result = TH3F(name, title,
                      len(edges[0]) - 1, edges[0],
                      len(edges[1]) - 1, edges[1],
                      len(edges[2]) - 1, edges[2])

        # HACK: TH3 doesn't have a FillN method, so we have to do things the
        # slow way.
        # TODO: We may want to put a warning about this slowness
        if weights == nullptr:
            weights = numpy.ones(count, dtype=numpy.float64)
        for x, y, z, w in zip(samples[0], samples[1], samples[2], weights):
            result.Fill(x, y, z, w)
    else:
        raise ValueError('ROOT can only histograms 1 - 3 dimensions')

    # All done
    return result
Пример #8
0
 def test_normalize(self):
     # Check that normalization works
     self.assertEqual(normalized("!x && y || z"), "~x & y | z")
Пример #9
0
 def test_normalize(self):
     # Check that normalization works
     self.assertEqual(normalized('!x && y || z'), '~x & y | z')