示例#1
0
def _get_mean_and_median(hist: Hist) -> Tuple[float, float]:
    """ Retrieve the mean and median from a ROOT histogram.

    Note:
        These values are not so trivial to calculate without ROOT, as they are the bin values
        weighted by the bin content.

    Args:
        hist: Histogram from which the values will be extract.
    Returns:
        mean, median of the histogram.
    """
    # Median
    # See: https://root-forum.cern.ch/t/median-of-histogram/7626/5
    x = ctypes.c_double(0)
    q = ctypes.c_double(0.5)
    # Apparently needed to be safe(?)
    hist.ComputeIntegral()
    hist.GetQuantiles(1, x, q)

    mean = hist.GetMean()

    return (mean, x.value)
示例#2
0
    def _project_THn(self, hist: Hist) -> Any:
        """ Perform the actual THn -> THn or TH1 projection.

        This projection could be to 1D, 2D, 3D, or ND.

        Args:
            hist (ROOT.THnBase): Histogram from which the projections should be performed.
        Returns:
            ROOT.THnBase or ROOT.TH1: The projected histogram.
        """
        # THnBase projections args are given as a list of axes, followed by any possible options.
        projection_axes = [
            axis.axis_type.value for axis in self.projection_axes
        ]

        # Handle ROOT THnBase quirk...
        # 2D projection are called as (y, x, options), so we should reverse the order so it performs
        # as expected
        if len(projection_axes) == 2:
            # Reverses in place
            projection_axes.reverse()

        # Test calculating errors
        # Add "E" to ensure that errors will be calculated
        args = projection_axes + ["E"]
        # Do the actual projection
        logger.debug(f"hist: {hist.GetName()} args: {args}")

        if len(projection_axes) > 3:
            # Project into a THnBase object.
            projected_hist = hist.ProjectionND(*args)
        else:
            # Project a TH1 derived object.
            projected_hist = hist.Projection(*args)

        return projected_hist
示例#3
0
    def _project_TH3(self, hist: Hist) -> Any:
        """ Perform the actual TH3 -> TH1 projection.

        This projection could be to 1D or 2D.

        Args:
            hist (ROOT.TH3): Histogram from which the projections should be performed.
        Returns:
            ROOT.TH1: The projected histogram.
        """
        # Axis length validation
        if len(self.projection_axes) < 1 or len(self.projection_axes) > 2:
            raise ValueError(len(self.projection_axes),
                             "Invalid number of axes")

        # Need to concatenate the names of the axes together
        projection_axis_name = ""
        for axis in self.projection_axes:
            # Determine the axis name based on the name of the axis type.
            # [:1] returns just the first letter. For example, we could get "xy" if the first axis as
            # x_axis and the second was y_axis.
            # NOTE: Careful. This depends on the name of the enumerated values!!! Since this isn't terribly
            #       safe, we then perform additional validation on the same to ensure that it is one of the
            #       expected axis names.
            proj_axis_name = axis.axis_type.name[:1]
            if proj_axis_name not in ["x", "y", "z"]:
                raise ValueError(
                    f"Projection axis name {proj_axis_name} is not 'x', 'y', or 'z'. Please check your configuration."
                )
            projection_axis_name += proj_axis_name

        # Handle ROOT Project3D quirk...
        # 2D projection are called as (y, x, options), so we should reverse the order so it performs
        # as expected.
        # NOTE: This isn't well documented in TH3. It is instead described in THnBase.Projection(...)
        if len(self.projection_axes) == 2:
            # Reverse the axes
            projection_axis_name = projection_axis_name[::-1]

        # Do the actual projection
        logger.info(
            f"Projecting onto axes \"{projection_axis_name}\" from hist {hist.GetName()}"
        )
        projected_hist = hist.Project3D(projection_axis_name)

        return projected_hist
示例#4
0
def _remove_outliers_from_hist(
        hist: Hist, outliers_start_index: int,
        outliers_removal_axis: OutliersRemovalAxis) -> None:
    """Remove outliers from a given histogram.

    Args:
        hist: Histogram to check for outliers.
        outliers_start_index: Index in the truth axis where outliers begin.
        outliers_removal_axis: Axis along which outliers removal will be performed. Usually
            the particle level aixs.
    Returns:
        None. The histogram is modified in place.
    """
    # Use on TH1, TH2, and TH3 since we don't start removing immediately, but instead only after the limit
    if outliers_start_index > 0:
        # logger.debug("Removing outliers")
        # Check for values above which they should be removed by translating the global index
        x = ctypes.c_int(0)
        y = ctypes.c_int(0)
        z = ctypes.c_int(0)
        # Maps axis to valaues
        # This is kind of dumb, but it works.
        outliers_removal_axis_values: Dict[OutliersRemovalAxis,
                                           ctypes.c_int] = {
                                               projectors.TH1AxisType.x_axis:
                                               x,
                                               projectors.TH1AxisType.y_axis:
                                               y,
                                               projectors.TH1AxisType.z_axis:
                                               z,
                                           }
        for index in range(0, hist.GetNcells()):
            # Get the bin x, y, z from the global bin
            hist.GetBinXYZ(index, x, y, z)
            # Watch out for any problems
            if hist.GetBinContent(index) < hist.GetBinError(index):
                logger.warning(
                    f"Bin content < error. Name: {hist.GetName()}, Bin content: {hist.GetBinContent(index)}, Bin error: {hist.GetBinError(index)}, index: {index}, ({x.value}, {y.value})"
                )
            if outliers_removal_axis_values[
                    outliers_removal_axis].value >= outliers_start_index:
                # logger.debug("Cutting for index {}. x bin {}. Cut index: {}".format(index, x, cutIndex))
                hist.SetBinContent(index, 0)
                hist.SetBinError(index, 0)
    else:
        logger.info(f"Hist {hist.GetName()} did not have any outliers to cut")
示例#5
0
    def axis_func(hist: Hist) -> Axis:
        """ Retrieve the axis associated with the ``HistAxisRange`` object for a given hist.

        Args:
            hist: Histogram from which the selected axis should be retrieved.
            axis_type: Enumeration corresponding to the axis to be restricted. The numerical
                value of the enum should be axis number (for a THnBase).
        Returns:
            ROOT.TAxis: The axis associated with the ``HistAxisRange`` object.
        """
        # Determine the axis_type value
        # Use try here instead of checking for a particular type to protect against type changes
        # (say in the enum)
        try:
            # Try to extract the value from an enum
            hist_axis_type = axis_type.value
        except AttributeError:
            # Seems that we received an int, so just use that value
            hist_axis_type = axis_type

        if hasattr(hist, "ProjectionND") and hasattr(hist, "Projection"):
            # THnBase defines ProjectionND and Projection, so we will use those as proxies.
            # Return the proper THn access
            #logger.debug(f"From hist: {hist}, hist_axis_type: {hist_axis_type}, axis: {hist.GetAxis(hist_axis_type.value)}")
            return hist.GetAxis(hist_axis_type)
        else:
            # If it's not a THn, then it must be a TH1 derived
            axis_function_map = {
                TH1AxisType.x_axis.value: hist.GetXaxis,
                TH1AxisType.y_axis.value: hist.GetYaxis,
                TH1AxisType.z_axis.value: hist.GetZaxis
            }

            # Retrieve the axis function and execute it. It is done separately to
            # clarify any possible errors.
            return_func = axis_function_map[hist_axis_type]
            return return_func()
示例#6
0
def get_array_from_hist2D(hist: Hist,
                          set_zero_to_NaN: bool = True,
                          return_bin_edges: bool = False
                          ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """ Extract x, y, and bin values from a 2D ROOT histogram.

    Converts the histogram into a numpy array, and suitably processes it for a surface plot
    by removing 0s (which can cause problems when taking logs), and returning a set of (x, y) mesh
    values utilziing either the bin edges or bin centers.

    Note:
        This is a different format than the 1D version!

    Args:
        hist (ROOT.TH2): Histogram to be converted.
        set_zero_to_NaN: If true, set 0 in the array to NaN. Useful with matplotlib so that it will
            ignore the values when plotting. See comments in this function for more details. Default: True.
        return_bin_edges: Return x and y using bin edges instead of bin centers.
    Returns:
        Contains (x values, y values, numpy array of hist data) where (x, y) are values on a
            grid (from np.meshgrid) using the selected bin values.
    """
    # Process the hist into a suitable state
    # NOTE: The shape specific can be somewhat confusing (ie. I would naviely expected to specify the x first.)
    # This says that the ``GetYaxis().GetNbins()`` number of rows and ``GetXaxis().GetNbins()`` number of columns.
    shape = (hist.GetYaxis().GetNbins(), hist.GetXaxis().GetNbins())
    # To keep consistency with the root_numpy 2D hist format, we transpose the final result
    # This format has x values as columns.
    hist_array = np.array([
        hist.GetBinContent(x) for x in range(1, hist.GetNcells())
        if not hist.IsBinUnderflow(x) and not hist.IsBinOverflow(x)
    ])
    # The hist_array was linear, so we need to shape it into our expected 2D values.
    hist_array = hist_array.reshape(shape)
    # Transpose the array to better match expectations
    # In particular, by transposing the array, it means that ``thist_array[1][0]`` gives the 2nd x
    # value (x_index = 1) and the 1st y value (y_index = 1). This is as we would expect. This is also
    # the same convention as used by root_numpy
    hist_array = hist_array.T
    # Set all 0s to nan to get similar behavior to ROOT. In ROOT, it will basically ignore 0s. This is
    # especially important for log plots. Matplotlib doesn't handle 0s as well, since it attempts to
    # plot them and then will throw exceptions when the log is taken.
    # By setting to nan, matplotlib basically ignores them similar to ROOT
    # NOTE: This requires a few special functions later which ignore nan when calculating min and max.
    if set_zero_to_NaN:
        hist_array[hist_array == 0] = np.nan

    if return_bin_edges:
        # Bin edges
        x_bin_edges = get_bin_edges_from_axis(hist.GetXaxis())
        y_bin_edges = get_bin_edges_from_axis(hist.GetYaxis())

        # NOTE: The addition of epsilon to the max is extremely important! Otherwise, the x and y
        #       ranges will be one bin short since ``arange`` is not inclusive. This could also be resolved
        #       by using ``linspace``, but I think this approach is perfectly fine.
        # NOTE: This epsilon is smaller than the one in ``utils`` because we are sometimes dealing
        #       with small times (~ns). The other value is larger because (I seem to recall) that
        #       smaller values didn't always place nice with ROOT, but it is fine here, since we're
        #       working with numpy.
        # NOTE: This should be identical to taking the min and max of the axis using
        #       ``TAxis.GetXmin()`` and ``TAxis.GetXmax()``, but I prefer this approach.
        epsilon = 1e-9
        x_range = np.arange(np.amin(x_bin_edges),
                            np.amax(x_bin_edges) + epsilon,
                            hist.GetXaxis().GetBinWidth(1))
        y_range = np.arange(np.amin(y_bin_edges),
                            np.amax(y_bin_edges) + epsilon,
                            hist.GetYaxis().GetBinWidth(1))
    else:
        # We want an array of bin centers
        x_range = np.array([
            hist.GetXaxis().GetBinCenter(i)
            for i in range(1,
                           hist.GetXaxis().GetNbins() + 1)
        ])
        y_range = np.array([
            hist.GetYaxis().GetBinCenter(i)
            for i in range(1,
                           hist.GetYaxis().GetNbins() + 1)
        ])

    X, Y = np.meshgrid(x_range, y_range)

    return (X, Y, hist_array)
示例#7
0
    def _from_th1(
        hist: Hist
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
        """ Convert a TH1 histogram to a Histogram.

        Note:
            Underflow and overflow bins are excluded!

        Args:
            hist (ROOT.TH1): Input histogram.
        Returns:
            tuple: (x, y, errors) where x is the bin centers, y is the bin values, and
                errors are the sumw2 bin errors.
        """
        # Enable sumw2 if it's not already calculated
        if hist.GetSumw2N() == 0:
            hist.Sumw2(True)

        # Don't include overflow
        bin_edges = get_bin_edges_from_axis(hist.GetXaxis())
        # NOTE: The y value and bin error are stored with the hist, not the axis.
        y = np.array([
            hist.GetBinContent(i)
            for i in range(1,
                           hist.GetXaxis().GetNbins() + 1)
        ])
        errors = np.array(hist.GetSumw2())
        # Exclude the under/overflow bins
        errors = errors[1:-1]
        metadata = {}

        # Check for a TProfile.
        # In that case we need to retrieve the errors manually because the Sumw2() errors are
        # not the anticipated errors.
        if hasattr(hist, "BuildOptions"):
            errors = np.array([
                hist.GetBinError(i)
                for i in range(1,
                               hist.GetXaxis().GetNbins() + 1)
            ])
            # We expected errors squared
            errors = errors**2
        else:
            # Sanity check. If they don't match, something odd has almost certainly occurred.
            if not np.isclose(errors[0], hist.GetBinError(1)**2):
                raise ValueError(
                    "Sumw2 errors don't seem to represent bin errors!")

            # Retrieve the stats and store them in the metadata.
            # They are useful for calculating histogram properties (mean, variance, etc).
            stats = np.array([0, 0, 0, 0], dtype=np.float64)
            hist.GetStats(np.ctypeslib.as_ctypes(stats))
            # Return values are (each one is a single float):
            # [1], [2], [3], [4]
            # [1]: total_sum_w: Sum of weights (equal to np.sum(y) if unscaled)
            # [2]: total_sum_w2: Sum of weights squared (equal to np.sum(errors_squared) if unscaled)
            # [3]: total_sum_wx: Sum of w*x
            # [4}: total_sum_wx2: Sum of w*x*x
            metadata.update(_create_stats_dict_from_values(*stats))

        return (bin_edges, y, errors, metadata)