def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, x_min = None ):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms = [info['hist'] for info in histogram_information]
    bin_edges = []
    resolutions = []
    purities = {}
    stabilities = {}
    
    current_bin_start = 0
    current_bin_end = 0
        
    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start
    
    while current_bin_end < n_bins:
        # bin_End, p, s, N_reco
        current_bin_end, _, _, _, r = get_next_end( histograms, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width )
        resolutions.append(r)
        if not bin_edges:
            # if empty
            bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
        bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
        current_bin_start = current_bin_end
    # add the purity and stability values for the final binning
    for info in histogram_information:
        new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone( info['channel'] + '_' + str( info['CoM'] ) )
        get_bin_content = new_hist.ProjectionX().GetBinContent
        purities = calculate_purities( new_hist.Clone() )
        stabilities = calculate_stabilities( new_hist.Clone() )
        n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
        # Now check if the last bin also fulfils the requirements
        if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3:
            # if not, merge last two bins 
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]
            new_hist = rebin_2d( info['hist'], bin_edges, bin_edges ).Clone()
            get_bin_content = new_hist.ProjectionX().GetBinContent
            purities = calculate_purities( new_hist.Clone() )
            stabilities = calculate_stabilities( new_hist.Clone() )
            n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
        
        info['p_i'] = purities
        info['s_i'] = stabilities
        info['N'] = n_events
        info['res'] = resolutions

    return bin_edges, histogram_information
 def test_pre_calculated_purity( self ):
     purities = calculate_purities( self.pre_calculated )
     self.assertEqual( len( purities ), self.n_bins_x, 'Invalid number of purity terms' )
     for p in purities[1:-1]:
         self.assertEqual( p, 1 )
     self.assertEqual( purities[0], 0.2 )
     self.assertEqual( purities[-1], 0.25 )
 def test_pre_calculated_purity(self):
     purities = calculate_purities(self.pre_calculated)
     self.assertEqual(len(purities), self.n_bins_x,
                      'Invalid number of purity terms')
     for p in purities[1:-1]:
         self.assertEqual(p, 1)
     self.assertEqual(purities[0], 0.2)
     self.assertEqual(purities[-1], 0.25)
    def test_random_elipse_purity( self ):
        purities = calculate_purities( self.random_elipse )
        self.assertEqual( len( purities ), self.n_bins_x, 'Invalid number of purity terms' )

        # purities should always be above 0 and below ~0.5
        for p in purities:
            self.assertGreater( p, 0 )
            # allow for 10% error due to randomness
            self.assertLess( p, 0.5 + 0.1)
    def setUp(self):
        # create histograms
        # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 )
        # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6]
        # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11]
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        n_1 = 100000
        x_1 = 60 + 10 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        z_1 = np.vstack((x_1, y_1)).T
        self.h1.fill_array(z_1)

        self.h1 = fix_overflow(self.h1)

        self.histogram_information = [{"hist": self.h1, "CoM": 7, "channel": "test_1"}]

        self.histograms = [info["hist"] for info in self.histogram_information]

        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000

        self.bin_edges = []
        self.purities_GetBinContent = []
        self.stabilities_GetBinContent = []
        self.n_events_GetBinContent = []

        self.purities_Integral = []
        self.stabilities_Integral = []
        self.n_events_Integral = []

        first_hist = self.histograms[0]
        n_bins = first_hist.GetNbinsX()

        current_bin_start = 0
        current_bin_end = 0

        while current_bin_end < n_bins:
            current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end(
                self.histograms, current_bin_start, current_bin_end, self.p_min, self.s_min, self.n_min, 0
            )
            if not self.bin_edges:
                # if empty
                self.bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
            self.bin_edges.append(
                first_hist.GetXaxis().GetBinLowEdge(current_bin_end)
                + first_hist.GetXaxis().GetBinWidth(current_bin_end)
            )
            self.purities_Integral.append(p)
            self.stabilities_Integral.append(s)
            self.n_events_Integral.append(n_gen_and_reco)
            current_bin_start = current_bin_end

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges)

        self.purities_GetBinContent = calculate_purities(self.h1_rebinned)
        self.stabilities_GetBinContent = calculate_stabilities(self.h1_rebinned)
        self.n_events_GetBinContent = [int(self.h1_rebinned.GetBinContent(i, i)) for i in range(1, len(self.bin_edges))]
def calculate_purity_stability(hist_info, bin_edges):
    '''
    Rebin finebinned histograms to current binning standards
    '''
    hist = hist_info['hist']
    binned_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone()
    p = calculate_purities(binned_hist)
    s = calculate_stabilities(binned_hist)
    return p, s
Exemplo n.º 7
0
def calculate_purity_stability(hist_info, bin_edges):
    '''
    Rebin finebinned histograms to current binning standards
    '''
    hist = hist_info['hist']
    binned_hist = rebin_2d(hist, bin_edges, bin_edges).Clone()
    p = calculate_purities(binned_hist)
    s = calculate_stabilities(binned_hist)
    return p, s
    def test_random_elipse_purity(self):
        purities = calculate_purities(self.random_elipse)
        self.assertEqual(len(purities), self.n_bins_x,
                         'Invalid number of purity terms')

        # purities should always be above 0 and below ~0.5
        for p in purities:
            self.assertGreater(p, 0)
            # allow for 10% error due to randomness
            self.assertLess(p, 0.5 + 0.1)
Exemplo n.º 9
0
def makePurityStabilityPlots(input_file, histogram, bin_edges, channel, variable, isVisiblePhaseSpace):
    global output_folder, output_formats
 
    hist = get_histogram_from_file( histogram, input_file )
    print "bin edges contents   : ", bin_edges
    new_hist = rebin_2d( hist, bin_edges, bin_edges ).Clone()

    # get_bin_content = hist.ProjectionX().GetBinContent
    purities = calculate_purities( new_hist.Clone() )
    stabilities = calculate_stabilities( new_hist.Clone() )
    # n_events = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]
    print "purities contents    : ", purities
    print "stabilities contents : ", stabilities

    hist_stability = value_tuplelist_to_hist(stabilities, bin_edges)
    hist_purity = value_tuplelist_to_hist(purities, bin_edges)

    hist_purity.color = 'red'
    hist_stability.color = 'blue'

    hist_stability.linewidth = 4
    hist_purity.linewidth = 4

    x_limits = [bin_edges[0], bin_edges[-1]]
    y_limits = [0,1]
    plt.figure( figsize = ( 20, 16 ), dpi = 200, facecolor = 'white' )

    ax0 = plt.axes()
    ax0.minorticks_on()
#     ax0.grid( True, 'major', linewidth = 2 )
#     ax0.grid( True, 'minor' )
    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )
    ax0.xaxis.labelpad = 12
    ax0.yaxis.labelpad = 12
    rplt.hist( hist_stability , stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Stability' )
    rplt.hist( hist_purity, stacked=False, axes = ax0, cmap = my_cmap, vmin = 1, label = 'Purity' )

    ax0.set_xlim( x_limits )
    ax0.set_ylim( y_limits )

    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )

    x_title = '$' + variables_latex[variable] + '$ [GeV]'
    plt.xlabel( x_title, CMS.x_axis_title )

    leg = plt.legend(loc=4,prop={'size':40})

    plt.tight_layout()

    plt.savefig('test.pdf')
    save_as_name = 'purityStability_'+channel + '_' + variable + '_' + str(options.CoM) + 'TeV'
    for output_format in output_formats:
        plt.savefig( output_folder + save_as_name + '.' + output_format )
 def test_purities(self):
     purities_1 = calculate_purities(self.h1_rebinned)
     for purity in purities_1:
         self.assertGreaterEqual(purity, self.p_min)
 def test_purities_reduced_spread(self):
     purities = calculate_purities(self.h3_rebinned)
     for purity in purities:
         self.assertGreaterEqual(purity, self.p_min)
def get_best_binning( histogram_information, p_min, s_min, n_min, min_width, nice_width, x_min = None, is_NJet=False, plot_resolution=False ):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for other channel histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms  = [info['hist'] for info in histogram_information]
    bin_edges   = []

    purities    = {}
    stabilities = {}
    resolutions = []

    current_bin_start = 0
    current_bin_end = 0

    first_hist = histograms[0]
    n_bins     = first_hist.GetNbinsX()
    # Start at minimum x instead of 0

    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start

    # Calculate the bin edges until no more bins can be iterated over
    while current_bin_end < n_bins:
        # Return the next bin end + (p, s, N_reco, res)
        current_bin_end, _, _, _, _ = get_next_end( histogram_information, current_bin_start, current_bin_end, p_min, s_min, n_min, min_width, nice_width, is_NJet=is_NJet)

        # Attach first bin low edge
        if not bin_edges:
            bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_start + 1 ) )
        # Attachs the current bin end edge
        bin_edges.append( first_hist.GetXaxis().GetBinLowEdge( current_bin_end ) + first_hist.GetXaxis().GetBinWidth( current_bin_end ) )
        current_bin_start = current_bin_end

    # add the purity and stability values for the final binning
    for hist_info in histogram_information:
        new_hist            = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone( hist_info['channel'] + '_' + str( hist_info['CoM'] ) )
        get_bin_content     = new_hist.ProjectionX().GetBinContent
        purities            = calculate_purities( new_hist.Clone() )
        stabilities         = calculate_stabilities( new_hist.Clone() )
        resolutions         = calculate_resolutions(  var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution )

        n_events            = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]

        # Now check if the last bin also fulfils the requirements
        if ( purities[-1] < p_min or stabilities[-1] < s_min or n_events[-1] < n_min ) and len(purities) > 3:
            # Merge last two bins 
            bin_edges[-2]   = bin_edges[-1]
            bin_edges       = bin_edges[:-1]

            # Recalculate purities and stabilites
            new_hist        = rebin_2d( hist_info['hist'], bin_edges, bin_edges ).Clone()
            purities        = calculate_purities( new_hist.Clone() )
            stabilities     = calculate_stabilities( new_hist.Clone() )
            resolutions     = calculate_resolutions(  var, bin_edges=bin_edges, channel = hist_info['channel'], res_to_plot = plot_resolution )
            n_events        = [int( get_bin_content( i ) ) for i in range( 1, len( bin_edges ) )]

        # Make sure last bin edge is also a nice rounded number
        if bin_edges[-1] % nice_width != 0:
            bin_edges[-1] = nice_width * round(bin_edges[-1]/nice_width)
            # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width))

        # Add purites, stabilities, n_events and resolutions to the hstogram information
        hist_info['p_i'] = purities
        hist_info['s_i'] = stabilities
        hist_info['N']   = n_events
        hist_info['res'] = resolutions

    return bin_edges, histogram_information
Exemplo n.º 13
0
 def test_purities_combined_1_3(self):
     purities = calculate_purities(self.h1_3_rebinned)
     for purity in purities:
         self.assertGreaterEqual(purity, self.p_min)
    def setUp(self):
        # create histograms
        # self.h1 = Hist2D( 15, 0, 15, 15, 0, 15 )
        # x_1 = [1, 3, 7, 7, 8, 1, 12, 7, 8, 6]
        # y_1 = [1, 7, 3, 7, 8, 12, 7, 12, 13, 11]
        self.h1 = Hist2D(60, 40, 100, 60, 40, 100)
        n_1 = 100000
        x_1 = 60 + 10 * np.random.randn(n_1)
        y_1 = x_1 + np.random.randn(n_1)
        z_1 = np.vstack((x_1, y_1)).T
        self.h1.fill_array(z_1)

        self.h1 = fix_overflow(self.h1)

        self.histogram_information = [
            {
                'hist': self.h1,
                'CoM': 7,
                'channel': 'test_1'
            },
        ]

        self.histograms = [info['hist'] for info in self.histogram_information]

        # requirements for new binning
        self.p_min, self.s_min, self.n_min = 0.5, 0.5, 1000

        self.bin_edges = []
        self.purities_GetBinContent = []
        self.stabilities_GetBinContent = []
        self.n_events_GetBinContent = []

        self.purities_Integral = []
        self.stabilities_Integral = []
        self.n_events_Integral = []

        first_hist = self.histograms[0]
        n_bins = first_hist.GetNbinsX()

        current_bin_start = 0
        current_bin_end = 0

        while current_bin_end < n_bins:
            current_bin_end, p, s, n_gen_and_reco = pick_bins.get_next_end(
                self.histograms, current_bin_start, current_bin_end,
                self.p_min, self.s_min, self.n_min, 0)
            if not self.bin_edges:
                # if empty
                self.bin_edges.append(
                    first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
            self.bin_edges.append(
                first_hist.GetXaxis().GetBinLowEdge(current_bin_end) +
                first_hist.GetXaxis().GetBinWidth(current_bin_end))
            self.purities_Integral.append(p)
            self.stabilities_Integral.append(s)
            self.n_events_Integral.append(n_gen_and_reco)
            current_bin_start = current_bin_end

        self.h1_rebinned = rebin_2d(self.h1, self.bin_edges, self.bin_edges)

        self.purities_GetBinContent = calculate_purities(self.h1_rebinned)
        self.stabilities_GetBinContent = calculate_stabilities(
            self.h1_rebinned)
        self.n_events_GetBinContent = [
            int(self.h1_rebinned.GetBinContent(i, i))
            for i in range(1, len(self.bin_edges))
        ]
 def test_best_case_purity( self ):
     purities = calculate_purities( self.best_case )
     self.assertEqual( len( purities ), self.n_bins_x, 'Invalid number of purity terms' )
     for p in purities:
         self.assertEqual( p, 1 )
Exemplo n.º 16
0
def get_best_binning(histogram_information,
                     p_min,
                     s_min,
                     n_min,
                     min_width,
                     nice_width,
                     x_min=None,
                     is_NJet=False,
                     plot_resolution=False):
    '''
    Step 1: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for other channel histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no more bins can be created
    '''
    histograms = [info['hist'] for info in histogram_information]
    bin_edges = []

    purities = {}
    stabilities = {}
    resolutions = []

    current_bin_start = 0
    current_bin_end = 0

    first_hist = histograms[0]
    n_bins = first_hist.GetNbinsX()
    # Start at minimum x instead of 0

    if x_min:
        current_bin_start = first_hist.ProjectionX().FindBin(x_min) - 1
        current_bin_end = current_bin_start

    # Calculate the bin edges until no more bins can be iterated over
    while current_bin_end < n_bins:
        # Return the next bin end + (p, s, N_reco, res)
        current_bin_end, _, _, _, _ = get_next_end(histogram_information,
                                                   current_bin_start,
                                                   current_bin_end,
                                                   p_min,
                                                   s_min,
                                                   n_min,
                                                   min_width,
                                                   nice_width,
                                                   is_NJet=is_NJet)

        # Attach first bin low edge
        if not bin_edges:
            bin_edges.append(
                first_hist.GetXaxis().GetBinLowEdge(current_bin_start + 1))
        # Attachs the current bin end edge
        bin_edges.append(first_hist.GetXaxis().GetBinLowEdge(current_bin_end) +
                         first_hist.GetXaxis().GetBinWidth(current_bin_end))
        current_bin_start = current_bin_end

    # add the purity and stability values for the final binning
    for hist_info in histogram_information:
        new_hist = rebin_2d(hist_info['hist'], bin_edges,
                            bin_edges).Clone(hist_info['channel'] + '_' +
                                             str(hist_info['CoM']))
        get_bin_content = new_hist.ProjectionX().GetBinContent
        purities = calculate_purities(new_hist.Clone())
        stabilities = calculate_stabilities(new_hist.Clone())
        resolutions = calculate_resolutions(var,
                                            bin_edges=bin_edges,
                                            channel=hist_info['channel'],
                                            res_to_plot=plot_resolution)

        n_events = [int(get_bin_content(i)) for i in range(1, len(bin_edges))]

        # Now check if the last bin also fulfils the requirements
        if (purities[-1] < p_min or stabilities[-1] < s_min
                or n_events[-1] < n_min) and len(purities) > 3:
            # Merge last two bins
            bin_edges[-2] = bin_edges[-1]
            bin_edges = bin_edges[:-1]

            # Recalculate purities and stabilites
            new_hist = rebin_2d(hist_info['hist'], bin_edges,
                                bin_edges).Clone()
            purities = calculate_purities(new_hist.Clone())
            stabilities = calculate_stabilities(new_hist.Clone())
            resolutions = calculate_resolutions(var,
                                                bin_edges=bin_edges,
                                                channel=hist_info['channel'],
                                                res_to_plot=plot_resolution)
            n_events = [
                int(get_bin_content(i)) for i in range(1, len(bin_edges))
            ]

        # Make sure last bin edge is also a nice rounded number
        if bin_edges[-1] % nice_width != 0:
            bin_edges[-1] = nice_width * round(bin_edges[-1] / nice_width)
            # print (bin_edges[-1], nice_width * round(bin_edges[-1]/nice_width))

        # Add purites, stabilities, n_events and resolutions to the hstogram information
        hist_info['p_i'] = purities
        hist_info['s_i'] = stabilities
        hist_info['N'] = n_events
        hist_info['res'] = resolutions

    return bin_edges, histogram_information
Exemplo n.º 17
0
 def test_purities_reduced_spread(self):
     purities = calculate_purities(self.h3_rebinned)
     for purity in purities:
         self.assertGreaterEqual(purity, self.p_min)
 def test_purities_combined_1_3(self):
     purities = calculate_purities(self.h1_3_rebinned)
     for purity in purities:
         self.assertGreaterEqual(purity, self.p_min)
 def test_best_case_purity(self):
     purities = calculate_purities(self.best_case)
     self.assertEqual(len(purities), self.n_bins_x,
                      'Invalid number of purity terms')
     for p in purities:
         self.assertEqual(p, 1)
Exemplo n.º 20
0
 def test_purities(self):
     purities_1 = calculate_purities(self.h1_rebinned)
     for purity in purities_1:
         self.assertGreaterEqual(purity, self.p_min)