def _get_mean_and_median(hist: Hist) -> Tuple[float, float]: """ Retrieve the mean and median from a ROOT histogram. Note: These values are not so trivial to calculate without ROOT, as they are the bin values weighted by the bin content. Args: hist: Histogram from which the values will be extract. Returns: mean, median of the histogram. """ # Median # See: https://root-forum.cern.ch/t/median-of-histogram/7626/5 x = ctypes.c_double(0) q = ctypes.c_double(0.5) # Apparently needed to be safe(?) hist.ComputeIntegral() hist.GetQuantiles(1, x, q) mean = hist.GetMean() return (mean, x.value)
def _project_THn(self, hist: Hist) -> Any: """ Perform the actual THn -> THn or TH1 projection. This projection could be to 1D, 2D, 3D, or ND. Args: hist (ROOT.THnBase): Histogram from which the projections should be performed. Returns: ROOT.THnBase or ROOT.TH1: The projected histogram. """ # THnBase projections args are given as a list of axes, followed by any possible options. projection_axes = [ axis.axis_type.value for axis in self.projection_axes ] # Handle ROOT THnBase quirk... # 2D projection are called as (y, x, options), so we should reverse the order so it performs # as expected if len(projection_axes) == 2: # Reverses in place projection_axes.reverse() # Test calculating errors # Add "E" to ensure that errors will be calculated args = projection_axes + ["E"] # Do the actual projection logger.debug(f"hist: {hist.GetName()} args: {args}") if len(projection_axes) > 3: # Project into a THnBase object. projected_hist = hist.ProjectionND(*args) else: # Project a TH1 derived object. projected_hist = hist.Projection(*args) return projected_hist
def _project_TH3(self, hist: Hist) -> Any: """ Perform the actual TH3 -> TH1 projection. This projection could be to 1D or 2D. Args: hist (ROOT.TH3): Histogram from which the projections should be performed. Returns: ROOT.TH1: The projected histogram. """ # Axis length validation if len(self.projection_axes) < 1 or len(self.projection_axes) > 2: raise ValueError(len(self.projection_axes), "Invalid number of axes") # Need to concatenate the names of the axes together projection_axis_name = "" for axis in self.projection_axes: # Determine the axis name based on the name of the axis type. # [:1] returns just the first letter. For example, we could get "xy" if the first axis as # x_axis and the second was y_axis. # NOTE: Careful. This depends on the name of the enumerated values!!! Since this isn't terribly # safe, we then perform additional validation on the same to ensure that it is one of the # expected axis names. proj_axis_name = axis.axis_type.name[:1] if proj_axis_name not in ["x", "y", "z"]: raise ValueError( f"Projection axis name {proj_axis_name} is not 'x', 'y', or 'z'. Please check your configuration." ) projection_axis_name += proj_axis_name # Handle ROOT Project3D quirk... # 2D projection are called as (y, x, options), so we should reverse the order so it performs # as expected. # NOTE: This isn't well documented in TH3. It is instead described in THnBase.Projection(...) if len(self.projection_axes) == 2: # Reverse the axes projection_axis_name = projection_axis_name[::-1] # Do the actual projection logger.info( f"Projecting onto axes \"{projection_axis_name}\" from hist {hist.GetName()}" ) projected_hist = hist.Project3D(projection_axis_name) return projected_hist
def _remove_outliers_from_hist( hist: Hist, outliers_start_index: int, outliers_removal_axis: OutliersRemovalAxis) -> None: """Remove outliers from a given histogram. Args: hist: Histogram to check for outliers. outliers_start_index: Index in the truth axis where outliers begin. outliers_removal_axis: Axis along which outliers removal will be performed. Usually the particle level aixs. Returns: None. The histogram is modified in place. """ # Use on TH1, TH2, and TH3 since we don't start removing immediately, but instead only after the limit if outliers_start_index > 0: # logger.debug("Removing outliers") # Check for values above which they should be removed by translating the global index x = ctypes.c_int(0) y = ctypes.c_int(0) z = ctypes.c_int(0) # Maps axis to valaues # This is kind of dumb, but it works. outliers_removal_axis_values: Dict[OutliersRemovalAxis, ctypes.c_int] = { projectors.TH1AxisType.x_axis: x, projectors.TH1AxisType.y_axis: y, projectors.TH1AxisType.z_axis: z, } for index in range(0, hist.GetNcells()): # Get the bin x, y, z from the global bin hist.GetBinXYZ(index, x, y, z) # Watch out for any problems if hist.GetBinContent(index) < hist.GetBinError(index): logger.warning( f"Bin content < error. Name: {hist.GetName()}, Bin content: {hist.GetBinContent(index)}, Bin error: {hist.GetBinError(index)}, index: {index}, ({x.value}, {y.value})" ) if outliers_removal_axis_values[ outliers_removal_axis].value >= outliers_start_index: # logger.debug("Cutting for index {}. x bin {}. Cut index: {}".format(index, x, cutIndex)) hist.SetBinContent(index, 0) hist.SetBinError(index, 0) else: logger.info(f"Hist {hist.GetName()} did not have any outliers to cut")
def axis_func(hist: Hist) -> Axis: """ Retrieve the axis associated with the ``HistAxisRange`` object for a given hist. Args: hist: Histogram from which the selected axis should be retrieved. axis_type: Enumeration corresponding to the axis to be restricted. The numerical value of the enum should be axis number (for a THnBase). Returns: ROOT.TAxis: The axis associated with the ``HistAxisRange`` object. """ # Determine the axis_type value # Use try here instead of checking for a particular type to protect against type changes # (say in the enum) try: # Try to extract the value from an enum hist_axis_type = axis_type.value except AttributeError: # Seems that we received an int, so just use that value hist_axis_type = axis_type if hasattr(hist, "ProjectionND") and hasattr(hist, "Projection"): # THnBase defines ProjectionND and Projection, so we will use those as proxies. # Return the proper THn access #logger.debug(f"From hist: {hist}, hist_axis_type: {hist_axis_type}, axis: {hist.GetAxis(hist_axis_type.value)}") return hist.GetAxis(hist_axis_type) else: # If it's not a THn, then it must be a TH1 derived axis_function_map = { TH1AxisType.x_axis.value: hist.GetXaxis, TH1AxisType.y_axis.value: hist.GetYaxis, TH1AxisType.z_axis.value: hist.GetZaxis } # Retrieve the axis function and execute it. It is done separately to # clarify any possible errors. return_func = axis_function_map[hist_axis_type] return return_func()
def get_array_from_hist2D(hist: Hist, set_zero_to_NaN: bool = True, return_bin_edges: bool = False ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Extract x, y, and bin values from a 2D ROOT histogram. Converts the histogram into a numpy array, and suitably processes it for a surface plot by removing 0s (which can cause problems when taking logs), and returning a set of (x, y) mesh values utilziing either the bin edges or bin centers. Note: This is a different format than the 1D version! Args: hist (ROOT.TH2): Histogram to be converted. set_zero_to_NaN: If true, set 0 in the array to NaN. Useful with matplotlib so that it will ignore the values when plotting. See comments in this function for more details. Default: True. return_bin_edges: Return x and y using bin edges instead of bin centers. Returns: Contains (x values, y values, numpy array of hist data) where (x, y) are values on a grid (from np.meshgrid) using the selected bin values. """ # Process the hist into a suitable state # NOTE: The shape specific can be somewhat confusing (ie. I would naviely expected to specify the x first.) # This says that the ``GetYaxis().GetNbins()`` number of rows and ``GetXaxis().GetNbins()`` number of columns. shape = (hist.GetYaxis().GetNbins(), hist.GetXaxis().GetNbins()) # To keep consistency with the root_numpy 2D hist format, we transpose the final result # This format has x values as columns. hist_array = np.array([ hist.GetBinContent(x) for x in range(1, hist.GetNcells()) if not hist.IsBinUnderflow(x) and not hist.IsBinOverflow(x) ]) # The hist_array was linear, so we need to shape it into our expected 2D values. hist_array = hist_array.reshape(shape) # Transpose the array to better match expectations # In particular, by transposing the array, it means that ``thist_array[1][0]`` gives the 2nd x # value (x_index = 1) and the 1st y value (y_index = 1). This is as we would expect. This is also # the same convention as used by root_numpy hist_array = hist_array.T # Set all 0s to nan to get similar behavior to ROOT. In ROOT, it will basically ignore 0s. This is # especially important for log plots. Matplotlib doesn't handle 0s as well, since it attempts to # plot them and then will throw exceptions when the log is taken. # By setting to nan, matplotlib basically ignores them similar to ROOT # NOTE: This requires a few special functions later which ignore nan when calculating min and max. if set_zero_to_NaN: hist_array[hist_array == 0] = np.nan if return_bin_edges: # Bin edges x_bin_edges = get_bin_edges_from_axis(hist.GetXaxis()) y_bin_edges = get_bin_edges_from_axis(hist.GetYaxis()) # NOTE: The addition of epsilon to the max is extremely important! Otherwise, the x and y # ranges will be one bin short since ``arange`` is not inclusive. This could also be resolved # by using ``linspace``, but I think this approach is perfectly fine. # NOTE: This epsilon is smaller than the one in ``utils`` because we are sometimes dealing # with small times (~ns). The other value is larger because (I seem to recall) that # smaller values didn't always place nice with ROOT, but it is fine here, since we're # working with numpy. # NOTE: This should be identical to taking the min and max of the axis using # ``TAxis.GetXmin()`` and ``TAxis.GetXmax()``, but I prefer this approach. epsilon = 1e-9 x_range = np.arange(np.amin(x_bin_edges), np.amax(x_bin_edges) + epsilon, hist.GetXaxis().GetBinWidth(1)) y_range = np.arange(np.amin(y_bin_edges), np.amax(y_bin_edges) + epsilon, hist.GetYaxis().GetBinWidth(1)) else: # We want an array of bin centers x_range = np.array([ hist.GetXaxis().GetBinCenter(i) for i in range(1, hist.GetXaxis().GetNbins() + 1) ]) y_range = np.array([ hist.GetYaxis().GetBinCenter(i) for i in range(1, hist.GetYaxis().GetNbins() + 1) ]) X, Y = np.meshgrid(x_range, y_range) return (X, Y, hist_array)
def _from_th1( hist: Hist ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]: """ Convert a TH1 histogram to a Histogram. Note: Underflow and overflow bins are excluded! Args: hist (ROOT.TH1): Input histogram. Returns: tuple: (x, y, errors) where x is the bin centers, y is the bin values, and errors are the sumw2 bin errors. """ # Enable sumw2 if it's not already calculated if hist.GetSumw2N() == 0: hist.Sumw2(True) # Don't include overflow bin_edges = get_bin_edges_from_axis(hist.GetXaxis()) # NOTE: The y value and bin error are stored with the hist, not the axis. y = np.array([ hist.GetBinContent(i) for i in range(1, hist.GetXaxis().GetNbins() + 1) ]) errors = np.array(hist.GetSumw2()) # Exclude the under/overflow bins errors = errors[1:-1] metadata = {} # Check for a TProfile. # In that case we need to retrieve the errors manually because the Sumw2() errors are # not the anticipated errors. if hasattr(hist, "BuildOptions"): errors = np.array([ hist.GetBinError(i) for i in range(1, hist.GetXaxis().GetNbins() + 1) ]) # We expected errors squared errors = errors**2 else: # Sanity check. If they don't match, something odd has almost certainly occurred. if not np.isclose(errors[0], hist.GetBinError(1)**2): raise ValueError( "Sumw2 errors don't seem to represent bin errors!") # Retrieve the stats and store them in the metadata. # They are useful for calculating histogram properties (mean, variance, etc). stats = np.array([0, 0, 0, 0], dtype=np.float64) hist.GetStats(np.ctypeslib.as_ctypes(stats)) # Return values are (each one is a single float): # [1], [2], [3], [4] # [1]: total_sum_w: Sum of weights (equal to np.sum(y) if unscaled) # [2]: total_sum_w2: Sum of weights squared (equal to np.sum(errors_squared) if unscaled) # [3]: total_sum_wx: Sum of w*x # [4}: total_sum_wx2: Sum of w*x*x metadata.update(_create_stats_dict_from_values(*stats)) return (bin_edges, y, errors, metadata)