def get_best_binning(histogram_information, p_min, s_min, n_min, min_width, x_min=None): """ Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created """ histograms = [info["hist"] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start while current_bin_end < n_bins: current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) if not bin_edges: # if empty bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1)) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge(current_bin_end) + first_hist.GetXaxis().GetBinWidth(current_bin_end) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d(info["hist"], bin_edges, bin_edges).Clone(info["channel"] + "_" + str(info["CoM"])) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))] # Now check if the last bin also fulfils the requirements if (purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min) and len(purities) > 3: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d(info["hist"], bin_edges, bin_edges).Clone() get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities(new_hist.Clone()) stabilities = calculate_stabilities(new_hist.Clone()) n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))] info["p_i"] = purities info["s_i"] = stabilities info["N"] = n_events return bin_edges, histogram_information
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no more bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() if x_min: current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1 current_bin_end = current_bin_start while current_bin_end < n_bins: current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width ) if not bin_edges: # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() get_bin_content = new_hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] info['p_i'] = purities info['s_i'] = stabilities info['N'] = n_events return bin_edges, histogram_information
def get_best_binning( histogram_information, p_min, s_min, n_min ): ''' Step 1: Change the size of the first bin until it fulfils the minimal criteria Step 3: Check if it is true for all other histograms. If not back to step 2 Step 4: Repeat step 2 & 3 until no mo bins can be created ''' histograms = [info['hist'] for info in histogram_information] bin_edges = [] purities = {} stabilities = {} current_bin_start = 0 current_bin_end = 0 first_hist = histograms[0] n_bins = first_hist.GetNbinsX() while current_bin_end < n_bins: current_bin_end, _, _, _ = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min ) if not bin_edges: # if empty bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) current_bin_start = current_bin_end # add the purity and stability values for the final binning for info in histogram_information: new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) ) get_bin_content = new_hist.GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i, i ) ) for i in range( 1, len( bin_edges ) )] # Now check if the last bin also fulfils the requirements if purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min: # if not, merge last two bins bin_edges[-2] = bin_edges[-1] bin_edges = bin_edges[:-1] new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone() get_bin_content = new_hist.GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) n_events = [int( get_bin_content( i, i ) ) for i in range( 1, len( bin_edges ) )] info['p_i'] = purities info['s_i'] = stabilities info['N'] = n_events return bin_edges, histogram_information
def test_rebin_2d_not_on_boundaries( self ): new_hist = rebin_2d( self.diagonals, self.bin_edges_not_on_boundaries, self.bin_edges_not_on_boundaries ) for i in range( 1, new_hist.nbins() + 1 ): self.assertEqual( new_hist.GetBinContent( i, i ), self.result_not_on_boundaries[i - 1], 'histogram contents do not match' )
def setUp( self ): # create histograms self.h1 = Hist( 100, 0, 100, title = '1D' ) self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 ) self.simple = Hist( 100, 0, 100, title = '1D' ) # fill the histograms with our distributions map( self.h1.Fill, x1 ) self.h2.fill_array( np.random.multivariate_normal( mean = ( 50, 50 ), cov = np.arange( 4 ).reshape( 2, 2 ), size = ( int( 1E6 ), ) ) ) self.bins = [40, 45, 50, 60, 65, 70, 75, 80, 100] # rebin them self.h2_rebinned = self.h2.rebinned( self.bins, axis = 0 ) self.h2_rebinned = self.h2_rebinned.rebinned( self.bins, axis = 1 ) self.h2_rebinned_2 = rebin_2d( self.h2, self.bins, self.bins ) # we only test symmetric bins for now self.n_bins_x = 5 self.n_bins_y = 5 # only entries in diagonals, p = 1, s = 1 for all bins self.diagonals = Hist2D( self.n_bins_x, 0, 10, self.n_bins_y, 0, 10 ) # this creates # [0, 0, 0, 0, 1], # [0, 0, 0, 1, 0], # [0, 0, 1, 0, 0], # [0, 1, 0, 0, 0], # [1, 0, 0, 0, 0] for i in range( 1, self.n_bins_x + 1 ): self.diagonals.SetBinContent( i, i, 1 ) # the following should result in # [0, 0, 2], # [0, 2, 0], # [1, 0, 0] self.bin_edges_nice = [0, 2, 6, 10] self.result_nice = [1, 2, 2] # the following should result in # [0, 0, 0, 2], # [0, 0, 2, 0] # [0, 1, 0, 0] # [0, 0, 0, 0] self.bin_edges_out_of_bound = [-2, 0, 2, 6, 20] self.result_out_of_bound = [0, 1, 2, 2] # the following should result in # [0, 0, 2], # [0, 1, 0], # [2, 0, 0] self.bin_edges_not_on_boundaries = [0, 3.5, 6, 20] self.result_not_on_boundaries = [2, 1, 2] for i in range(100): self.simple.Fill(i, 1)
def setUp( self ): # create histograms # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 ) # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6] # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11] self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 ) n_1 = 100000 x_1 = 60 + 10 * np.random.randn( n_1 ) y_1 = x_1 + np.random.randn( n_1 ) z_1 = np.vstack( ( x_1, y_1 ) ).T self.h1.fill_array( z_1 ) self.h1 = fix_overflow( self.h1 ) self.histogram_information = [ {'hist': self.h1, 'CoM': 7, 'channel':'test_1'}, ] self.histograms = [info['hist'] for info in self.histogram_information] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000 self.bin_edges = [] self.purities_GetBinContent = [] self.stabilities_GetBinContent = [] self.n_events_GetBinContent = [] self.purities_Integral = [] self.stabilities_Integral = [] self.n_events_Integral = [] first_hist = self.histograms[0] n_bins = first_hist.GetNbinsX() current_bin_start = 0 current_bin_end = 0 while current_bin_end < n_bins: current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end( self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min ) if not self.bin_edges: # if empty self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) ) self.bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) ) self.purities_Integral.append(p) self.stabilities_Integral.append(s) self.n_events_Integral.append(n_gen_and_reco) current_bin_start = current_bin_end self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges) self.purities_GetBinContent = calculate_purities( self.h1_rebinned ) self.stabilities_GetBinContent = calculate_stabilities( self.h1_rebinned ) self.n_events_GetBinContent = [int( self.h1_rebinned.GetBinContent( i, i ) ) for i in range( 1, len( self.bin_edges ) )]
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace): global output_folder, output_formats hist = get_histogram_from_file( histogram, input_file ) print "bin edges contents : ", bin_edges new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone() # get_bin_content = hist.ProjectionX().GetBinContent purities = calculate_purities( new_hist.Clone() ) stabilities = calculate_stabilities( new_hist.Clone() ) # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )] print "purities contents : ", purities print "stabilities contents : ", stabilities hist_stability = value_tuplelist_to_hist(stabilities, bin_edges) hist_purity = value_tuplelist_to_hist(purities, bin_edges) hist_purity.color = 'red' hist_stability.color = 'blue' hist_stability.linewidth = 4 hist_purity.linewidth = 4 x_limits = [bin_edges[0], bin_edges[-1]] y_limits = [0,1] plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' ) ax0 = plt.axes() ax0.minorticks_on() # ax0.grid( True, 'major', linewidth = 2 ) # ax0.grid( True, 'minor' ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) ax0.xaxis.labelpad = 12 ax0.yaxis.labelpad = 12 rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' ) rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' ) ax0.set_xlim( x_limits ) ax0.set_ylim( y_limits ) plt.tick_params( **CMS.axis_label_major ) plt.tick_params( **CMS.axis_label_minor ) x_title = '$' + variables_latex[variable] + '$ [GeV]' plt.xlabel( x_title, CMS.x_axis_title ) leg = plt.legend(loc=4,prop={'size':40}) plt.tight_layout() plt.savefig('test.pdf') save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV' for output_format in output_formats: plt.savefig( output_folder + save_as_name + '.' + output_format )
def setUp( self ): # create histograms self.h1 = Hist2D( 60, 40, 100, 60, 40, 100 ) self.h2 = Hist2D( 60, 40, 100, 60, 40, 100 ) self.h3 = Hist2D( 60, 40, 100, 60, 40, 100 ) n_1 = 10000 n_2 = int( n_1 / 5 ) x_1 = 60 + 10 * np.random.randn( n_1 ) x_2 = 60 + 10 * np.random.randn( n_2 ) x_3 = 60 + 5 * np.random.randn( n_1 ) y_1 = x_1 + np.random.randn( n_1 ) y_2 = x_2 + np.random.randn( n_2 ) y_3 = x_3 + np.random.randn( n_1 ) z_1 = np.vstack( ( x_1, y_1 ) ).T z_2 = np.vstack( ( x_2, y_2 ) ).T z_3 = np.vstack( ( x_3, y_3 ) ).T # fill the histograms with our distributions self.h1.fill_array( z_1 ) # reduced number of events self.h2.fill_array( z_2 ) # reduced spread self.h3.fill_array( z_3 ) self.histogram_information_1 = [ {'hist': self.h1, 'CoM': 7, 'channel':'test_1'}, ] self.histogram_information_2 = [ {'hist': self.h2, 'CoM': 7, 'channel':'test_2'}, ] self.histogram_information_3 = [ {'hist': self.h3, 'CoM': 7, 'channel':'test_3'}, ] self.histogram_information_1_2 = [ {'hist': self.h1, 'CoM': 7, 'channel':'test_1'}, {'hist': self.h2, 'CoM': 7, 'channel':'test_2'}, ] self.histogram_information_1_3 = [ {'hist': self.h1, 'CoM': 7, 'channel':'test_1'}, {'hist': self.h3, 'CoM': 7, 'channel':'test_3'}, ] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100 self.bin_edges_1, _ = pick_bins.get_best_binning( self.histogram_information_1, self.p_min, self.s_min, self.n_min ) self.bin_edges_2, _ = pick_bins.get_best_binning( self.histogram_information_2, self.p_min, self.s_min, self.n_min ) self.bin_edges_3, _ = pick_bins.get_best_binning( self.histogram_information_3, self.p_min, self.s_min, self.n_min ) self.bin_edges_1_2, _ = pick_bins.get_best_binning( self.histogram_information_1_2, self.p_min, self.s_min, self.n_min ) self.bin_edges_1_3, _ = pick_bins.get_best_binning( self.histogram_information_1_3, self.p_min, self.s_min, self.n_min ) self.h1_rebinned = rebin_2d(self.h1, self.bin_edges_1, self.bin_edges_1) self.h2_rebinned = rebin_2d(self.h2, self.bin_edges_2, self.bin_edges_2) self.h3_rebinned = rebin_2d(self.h3, self.bin_edges_3, self.bin_edges_3) self.h1_2_rebinned = rebin_2d(self.h1, self.bin_edges_1_2, self.bin_edges_1_2) self.h1_3_rebinned = rebin_2d(self.h1, self.bin_edges_1_3, self.bin_edges_1_3)
def setUp(self): # create histograms self.h1 = Hist2D(60, 40, 100, 60, 40, 100) self.h2 = Hist2D(60, 40, 100, 60, 40, 100) self.h3 = Hist2D(60, 40, 100, 60, 40, 100) n_1 = 10000 n_2 = int(n_1 / 5) x_1 = 60 + 10 * np.random.randn(n_1) x_2 = 60 + 10 * np.random.randn(n_2) x_3 = 60 + 5 * np.random.randn(n_1) y_1 = x_1 + np.random.randn(n_1) y_2 = x_2 + np.random.randn(n_2) y_3 = x_3 + np.random.randn(n_1) z_1 = np.vstack((x_1, y_1)).T z_2 = np.vstack((x_2, y_2)).T z_3 = np.vstack((x_3, y_3)).T # fill the histograms with our distributions self.h1.fill_array(z_1) # reduced number of events self.h2.fill_array(z_2) # reduced spread self.h3.fill_array(z_3) self.histogram_information_1 = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, ] self.histogram_information_2 = [ { 'hist': self.h2, 'CoM': 7, 'channel': 'test_2' }, ] self.histogram_information_3 = [ { 'hist': self.h3, 'CoM': 7, 'channel': 'test_3' }, ] self.histogram_information_1_2 = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, { 'hist': self.h2, 'CoM': 7, 'channel': 'test_2' }, ] self.histogram_information_1_3 = [ { 'hist': self.h1, 'CoM': 7, 'channel': 'test_1' }, { 'hist': self.h3, 'CoM': 7, 'channel': 'test_3' }, ] # requirements for new binning self.p_min, self.s_min, self.n_min = 0.5, 0.5, 100 self.bin_edges_1, _ = pick_bins.get_best_binning( self.histogram_information_1, self.p_min, self.s_min, self.n_min) self.bin_edges_2, _ = pick_bins.get_best_binning( self.histogram_information_2, self.p_min, self.s_min, self.n_min) self.bin_edges_3, _ = pick_bins.get_best_binning( self.histogram_information_3, self.p_min, self.s_min, self.n_min) self.bin_edges_1_2, _ = pick_bins.get_best_binning( self.histogram_information_1_2, self.p_min, self.s_min, self.n_min) self.bin_edges_1_3, _ = pick_bins.get_best_binning( self.histogram_information_1_3, self.p_min, self.s_min, self.n_min) self.h1_rebinned = rebin_2d(self.h1, self.bin_edges_1, self.bin_edges_1) self.h2_rebinned = rebin_2d(self.h2, self.bin_edges_2, self.bin_edges_2) self.h3_rebinned = rebin_2d(self.h3, self.bin_edges_3, self.bin_edges_3) self.h1_2_rebinned = rebin_2d(self.h1, self.bin_edges_1_2, self.bin_edges_1_2) self.h1_3_rebinned = rebin_2d(self.h1, self.bin_edges_1_3, self.bin_edges_1_3)