示例#1
0
    def makeDataFrame(self):
        sample_dict = {}
        samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel)
        working_samples = samples_doublefake
        working_samples = setSumWeights(working_samples)
        print('###########################################################')
        print('# measuring doublefakerake...')
        print('# %d samples to be used:'%(len(working_samples)))
        print('###########################################################')
        for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)]))
        chain = TChain('tree') #TChain'ing all data samples together
        for i,s in enumerate(working_samples):
            sample = working_samples[0]
            file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root'])
            chain.Add(file_name)
            
        dataframe = RDataFrame(chain)
        weight = 'weight * lhe_weight'
        dataframe = dataframe.Define('w',weight)\
                            .Define('ptCone',self.ptCone())\
                            .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\
                            .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\
                            .Define('abs_l1_eta','abs(l1_eta)')\
                            .Define('abs_l2_eta','abs(l2_eta)')\
                            .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\
                            .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\

        return dataframe
示例#2
0
def produceLightTree(sample='DY',ch='mmm'):
    if ch == 'mmm':
        d17B = data_B_mmm+suffix; d17C = data_C_mmm+suffix; d17D = data_D_mmm+suffix; d17E = data_E_mmm+suffix; d17F = data_F_mmm+suffix; 
        SFR_012_L = SFR_MMM_012_L
        l2_tight = l2_m_tight

    if ch == 'eem':
        d17B = data_B_eem+suffix; d17C = data_C_eem+suffix; d17D = data_D_eem+suffix; d17E = data_E_eem+suffix; d17F = data_F_eem+suffix; 

    t = rt.TChain('tree')

    if sample == 'DY':
        t.Add(DY)
        t.Add(DY_ext)

    if sample == 'data':
        t.Add(d17B)
       #t.Add(d17C)
       #t.Add(d17D)
       #t.Add(d17E)
       #t.Add(d17F)

    print '\n\ttotal entries:', t.GetEntries()

    df = RDF(t)
    df1 = df.Define('LOOSE', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 )' )
    df2 = df1.Define('TIGHT', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 && ' + l2_tight + ')' )

    num_L = df2.Filter('LOOSE == 1').Count().GetValue()
    print '\n\tloose entries in MR:', num_L 

    num_T = df2.Filter('TIGHT == 1').Count().GetValue()
    print '\n\ttight entries in MR:', num_T

    df2 = df2.Define('ptcone', PTCONEL2)

    branchList = rt.vector('string')()
    for br in ['event', 'lumi', 'run', 'LOOSE', 'TIGHT', 'l2_reliso_rho_03', 'l2_Medium', 'l2_eta', 'l2_pt', 'l2_dxy', 'l2_dz', 'ptcone']:
        branchList.push_back(br)
    df2.Snapshot('tree', saveDir+'/%s_%s_6_24B_Lcut_29_4.root'%(sample,ch), branchList)
示例#3
0
                    nargs='+',
                    help="list of collections to make plots of")
args = parser.parse_args()

models = {
    'm': (';m_{{{0}}} [GeV]; Events', 50, 0, 1000),
    'pt': (';p_{{T,{0}}} [GeV]; Events', 50, 0, 1000),
    'eta': (';#eta_{{{0}}}; Events', 15, -3., 3.),
    'phi': (';#phi_{{{0}}}; Events', 20, -4., 4.)
}
outdir = 'plots'
if args.output:
    outdir = args.output

rdf = RDataFrame("CollectionTree", args.infile)
rdf = rdf.Define('wgt', 'EventInfoAuxDyn.mcEventWeights[0]')

canv = TCanvas('c', '', 800, 600)

for coll in args.do:
    coll = coll.split(':')
    if len(coll) != 1 and len(coll) != 4:
        print('collection should either be "name" or "name:nbins:xmin:xmax"')
        continue

    varsuff = ''
    if 'fatjet' in coll[0]:
        cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux'
    elif 'jet' in coll[0]:
        cname = 'AntiKt4TruthDressedWZJetsAux'
    elif 'electron' in coll[0]:
示例#4
0
    def measureSFR(self, drawPlot = False):
        sample_dict = {}
        samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel)
        working_samples = samples_singlefake
        working_samples = setSumWeights(working_samples)
        print('###########################################################')
        print('# measuring singlefakerake...')
        print('# %d samples to be used:'%(len(working_samples)))
        print('###########################################################')
        for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)]))
        chain = TChain('tree') #TChain'ing all data samples together
        for i,s in enumerate(working_samples):
            sample = working_samples[0]
            file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root'])
            chain.Add(file_name)
            
        dataframe = RDataFrame(chain)
        weight = 'weight * lhe_weight'
        dataframe = dataframe.Define('w',weight)\
                            .Define('ptCone',self.ptCone())\
                            .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\
                            .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\
                            .Define('abs_l1_eta','abs(l1_eta)')\
                            .Define('abs_l2_eta','abs(l2_eta)')\
                            .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\
                            .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\

        # bins_ptCone = np.array([5.,10., 20., 30., 40.,70., 2000])
        # bins_eta    = np.array([0., 0.8, 1.2, 2.4]) 
        bins_ptCone = np.array([5.,10., 20., 30., 40.,70.])
        bins_eta    = np.array([0., 0.8, 1.2, 2.4]) 

        selection_baseline      = getSelection(self.channel,'MR_SF')  

        selection_LL_uncorrelated = '(' + ' & '\
                                    .join([\
                                    selection_baseline,\
                                    getSelection(self.channel,'L_L_uncorrelated')\
                                    ]) + ')' 
        selection_TT_uncorrelated = '(' + ' & '\
                                    .join([\
                                    selection_baseline,\
                                    getSelection(self.channel,'L_L_uncorrelated'),\
                                    getSelection(self.channel,'T_T')\
                                    ]) + ')' 

        h_LL_uncorrelated = dataframe\
                .Filter(selection_LL_uncorrelated)\
                .Histo2D(('h_LL_uncorrelated','h_LL_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w')
        #name the axis, also initiate the dataframe call
        h_LL_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta')

        h_TT_uncorrelated = dataframe\
                .Filter(selection_TT_uncorrelated)\
                .Histo2D(('h_TT_uncorrelated','h_TT_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w')
        #name the axis, also initiate the dataframe call
        h_TT_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta')

        # preparing the histo and save it into a .root file
        sfr_TH2_dir = '/home/dehuazhu/HNL/CMSSW_9_4_6_patch1/src/PlotFactory/DataBkgPlots/modules/DDE_singlefake.root' 
        sfr_hist = h_TT_uncorrelated.Clone()
        # sfr_hist = h_LL_uncorrelated.Clone()
        # sfrhist = h_baseline.Clone()
        # sfr_hist.Divide(h_LL_uncorrelated.Clone())
        # dfr_hist.SaveAs(sfr_TH2_dir) #uncomment this to save the TH2

        # draw the histo if required 
        if drawPlot == True:
            can = TCanvas('can', '')
            # sfr_hist.Draw('colzTextE')
            # sfr_hist.Draw('colz')
            sfr_hist.Draw()
            pf.showlumi('%d entries'%(sfr_hist.GetEntries()))
            # pf.showlogopreliminary()
            can.Update()
            set_trace()
示例#5
0
                                              lb=train + validation,
                                              ub=100)
    }


########
# Main #
########

if __name__ == '__main__':
    args = parse_input()

    gInterpreter.Declare('auto rand_gen = TRandom3({});'.format(args.seed))

    init_frame = RDataFrame(args.tree, args.ntp)
    rand_frame = init_frame.Define('rand_split', 'rand_gen.Uniform(0, 100)')
    if args.debug:
        print('loaded {} with {} entries'.format(
            args.ntp,
            rand_frame.Count().GetValue()))

    cuts = get_cuts(args.train_ratio, args.validation_ratio)
    for sample, cut in cuts.items():
        subsample_frame = rand_frame.Filter(cut)
        output_ntp = join(args.output_dir,
                          '{}_{}.root'.format(get_filename(args.ntp), sample))

        subsample_frame.Snapshot(args.tree, output_ntp)

        if args.debug:
            print('sample: {}, cuts: {}'.format(sample, cut))
gInterpreter.Declare('''
Int_t getBin(Double_t x, Double_t y, TH2D* histo) {
  return histo->FindFixBin(x, y);
}

auto getWeight(Double_t x, Double_t y, TH2D* histo) {
  auto binIdx = getBin(x, y, histo);
  return histo->GetBinContent(binIdx);
}
''')

gInterpreter.Declare(f'auto histoNtp = new TFile("{histoNtpN}", "read");')
gInterpreter.Declare(f'auto histo = dynamic_cast<TH2D*>(histoNtp->Get("{histoN}"));')

dfInit = RDataFrame(mcTreeN, mcNtpN)
df = dfInit.Define('wjk_alt', 'getWeight(b_ownpv_ndof, ntracks, histo)').Define('wt', 'wpid*wtrk*wjk_alt')

# NOTE: This comes from the existing ntuple
mcRootBrs = df.AsNumpy(columns=['wjk_occ', 'wjk_alt'])
wtJkOccRoot = mcRootBrs['wjk_occ']
wtJkOccAltRoot = mcRootBrs['wjk_alt']

histoRootMdl = TH2DModel(
    'histoRoot', 'histoRoot',
    20, 1, 200, 20, 0, 450
)
histoRoot = df.Histo2D(histoRootMdl, 'b_ownpv_ndof', 'ntracks', 'wt')


##################
# Histo w/ numpy #