def get_array_from_hist2D(hist: Hist, set_zero_to_NaN: bool = True, return_bin_edges: bool = False ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Extract x, y, and bin values from a 2D ROOT histogram. Converts the histogram into a numpy array, and suitably processes it for a surface plot by removing 0s (which can cause problems when taking logs), and returning a set of (x, y) mesh values utilziing either the bin edges or bin centers. Note: This is a different format than the 1D version! Args: hist (ROOT.TH2): Histogram to be converted. set_zero_to_NaN: If true, set 0 in the array to NaN. Useful with matplotlib so that it will ignore the values when plotting. See comments in this function for more details. Default: True. return_bin_edges: Return x and y using bin edges instead of bin centers. Returns: Contains (x values, y values, numpy array of hist data) where (x, y) are values on a grid (from np.meshgrid) using the selected bin values. """ # Process the hist into a suitable state # NOTE: The shape specific can be somewhat confusing (ie. I would naviely expected to specify the x first.) # This says that the ``GetYaxis().GetNbins()`` number of rows and ``GetXaxis().GetNbins()`` number of columns. shape = (hist.GetYaxis().GetNbins(), hist.GetXaxis().GetNbins()) # To keep consistency with the root_numpy 2D hist format, we transpose the final result # This format has x values as columns. hist_array = np.array([ hist.GetBinContent(x) for x in range(1, hist.GetNcells()) if not hist.IsBinUnderflow(x) and not hist.IsBinOverflow(x) ]) # The hist_array was linear, so we need to shape it into our expected 2D values. hist_array = hist_array.reshape(shape) # Transpose the array to better match expectations # In particular, by transposing the array, it means that ``thist_array[1][0]`` gives the 2nd x # value (x_index = 1) and the 1st y value (y_index = 1). This is as we would expect. This is also # the same convention as used by root_numpy hist_array = hist_array.T # Set all 0s to nan to get similar behavior to ROOT. In ROOT, it will basically ignore 0s. This is # especially important for log plots. Matplotlib doesn't handle 0s as well, since it attempts to # plot them and then will throw exceptions when the log is taken. # By setting to nan, matplotlib basically ignores them similar to ROOT # NOTE: This requires a few special functions later which ignore nan when calculating min and max. if set_zero_to_NaN: hist_array[hist_array == 0] = np.nan if return_bin_edges: # Bin edges x_bin_edges = get_bin_edges_from_axis(hist.GetXaxis()) y_bin_edges = get_bin_edges_from_axis(hist.GetYaxis()) # NOTE: The addition of epsilon to the max is extremely important! Otherwise, the x and y # ranges will be one bin short since ``arange`` is not inclusive. This could also be resolved # by using ``linspace``, but I think this approach is perfectly fine. # NOTE: This epsilon is smaller than the one in ``utils`` because we are sometimes dealing # with small times (~ns). The other value is larger because (I seem to recall) that # smaller values didn't always place nice with ROOT, but it is fine here, since we're # working with numpy. # NOTE: This should be identical to taking the min and max of the axis using # ``TAxis.GetXmin()`` and ``TAxis.GetXmax()``, but I prefer this approach. epsilon = 1e-9 x_range = np.arange(np.amin(x_bin_edges), np.amax(x_bin_edges) + epsilon, hist.GetXaxis().GetBinWidth(1)) y_range = np.arange(np.amin(y_bin_edges), np.amax(y_bin_edges) + epsilon, hist.GetYaxis().GetBinWidth(1)) else: # We want an array of bin centers x_range = np.array([ hist.GetXaxis().GetBinCenter(i) for i in range(1, hist.GetXaxis().GetNbins() + 1) ]) y_range = np.array([ hist.GetYaxis().GetBinCenter(i) for i in range(1, hist.GetYaxis().GetNbins() + 1) ]) X, Y = np.meshgrid(x_range, y_range) return (X, Y, hist_array)
def _from_th1( hist: Hist ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]: """ Convert a TH1 histogram to a Histogram. Note: Underflow and overflow bins are excluded! Args: hist (ROOT.TH1): Input histogram. Returns: tuple: (x, y, errors) where x is the bin centers, y is the bin values, and errors are the sumw2 bin errors. """ # Enable sumw2 if it's not already calculated if hist.GetSumw2N() == 0: hist.Sumw2(True) # Don't include overflow bin_edges = get_bin_edges_from_axis(hist.GetXaxis()) # NOTE: The y value and bin error are stored with the hist, not the axis. y = np.array([ hist.GetBinContent(i) for i in range(1, hist.GetXaxis().GetNbins() + 1) ]) errors = np.array(hist.GetSumw2()) # Exclude the under/overflow bins errors = errors[1:-1] metadata = {} # Check for a TProfile. # In that case we need to retrieve the errors manually because the Sumw2() errors are # not the anticipated errors. if hasattr(hist, "BuildOptions"): errors = np.array([ hist.GetBinError(i) for i in range(1, hist.GetXaxis().GetNbins() + 1) ]) # We expected errors squared errors = errors**2 else: # Sanity check. If they don't match, something odd has almost certainly occurred. if not np.isclose(errors[0], hist.GetBinError(1)**2): raise ValueError( "Sumw2 errors don't seem to represent bin errors!") # Retrieve the stats and store them in the metadata. # They are useful for calculating histogram properties (mean, variance, etc). stats = np.array([0, 0, 0, 0], dtype=np.float64) hist.GetStats(np.ctypeslib.as_ctypes(stats)) # Return values are (each one is a single float): # [1], [2], [3], [4] # [1]: total_sum_w: Sum of weights (equal to np.sum(y) if unscaled) # [2]: total_sum_w2: Sum of weights squared (equal to np.sum(errors_squared) if unscaled) # [3]: total_sum_wx: Sum of w*x # [4}: total_sum_wx2: Sum of w*x*x metadata.update(_create_stats_dict_from_values(*stats)) return (bin_edges, y, errors, metadata)