def makeDataFrame(self): sample_dict = {} samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel) working_samples = samples_doublefake working_samples = setSumWeights(working_samples) print('###########################################################') print('# measuring doublefakerake...') print('# %d samples to be used:'%(len(working_samples))) print('###########################################################') for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)])) chain = TChain('tree') #TChain'ing all data samples together for i,s in enumerate(working_samples): sample = working_samples[0] file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root']) chain.Add(file_name) dataframe = RDataFrame(chain) weight = 'weight * lhe_weight' dataframe = dataframe.Define('w',weight)\ .Define('ptCone',self.ptCone())\ .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\ .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\ .Define('abs_l1_eta','abs(l1_eta)')\ .Define('abs_l2_eta','abs(l2_eta)')\ .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\ .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\ return dataframe
def produceLightTree(sample='DY',ch='mmm'): if ch == 'mmm': d17B = data_B_mmm+suffix; d17C = data_C_mmm+suffix; d17D = data_D_mmm+suffix; d17E = data_E_mmm+suffix; d17F = data_F_mmm+suffix; SFR_012_L = SFR_MMM_012_L l2_tight = l2_m_tight if ch == 'eem': d17B = data_B_eem+suffix; d17C = data_C_eem+suffix; d17D = data_D_eem+suffix; d17E = data_E_eem+suffix; d17F = data_F_eem+suffix; t = rt.TChain('tree') if sample == 'DY': t.Add(DY) t.Add(DY_ext) if sample == 'data': t.Add(d17B) #t.Add(d17C) #t.Add(d17D) #t.Add(d17E) #t.Add(d17F) print '\n\ttotal entries:', t.GetEntries() df = RDF(t) df1 = df.Define('LOOSE', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 )' ) df2 = df1.Define('TIGHT', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 && ' + l2_tight + ')' ) num_L = df2.Filter('LOOSE == 1').Count().GetValue() print '\n\tloose entries in MR:', num_L num_T = df2.Filter('TIGHT == 1').Count().GetValue() print '\n\ttight entries in MR:', num_T df2 = df2.Define('ptcone', PTCONEL2) branchList = rt.vector('string')() for br in ['event', 'lumi', 'run', 'LOOSE', 'TIGHT', 'l2_reliso_rho_03', 'l2_Medium', 'l2_eta', 'l2_pt', 'l2_dxy', 'l2_dz', 'ptcone']: branchList.push_back(br) df2.Snapshot('tree', saveDir+'/%s_%s_6_24B_Lcut_29_4.root'%(sample,ch), branchList)
nargs='+', help="list of collections to make plots of") args = parser.parse_args() models = { 'm': (';m_{{{0}}} [GeV]; Events', 50, 0, 1000), 'pt': (';p_{{T,{0}}} [GeV]; Events', 50, 0, 1000), 'eta': (';#eta_{{{0}}}; Events', 15, -3., 3.), 'phi': (';#phi_{{{0}}}; Events', 20, -4., 4.) } outdir = 'plots' if args.output: outdir = args.output rdf = RDataFrame("CollectionTree", args.infile) rdf = rdf.Define('wgt', 'EventInfoAuxDyn.mcEventWeights[0]') canv = TCanvas('c', '', 800, 600) for coll in args.do: coll = coll.split(':') if len(coll) != 1 and len(coll) != 4: print('collection should either be "name" or "name:nbins:xmin:xmax"') continue varsuff = '' if 'fatjet' in coll[0]: cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux' elif 'jet' in coll[0]: cname = 'AntiKt4TruthDressedWZJetsAux' elif 'electron' in coll[0]:
def measureSFR(self, drawPlot = False): sample_dict = {} samples_all, samples_singlefake, samples_doublefake = createSampleLists(analysis_dir=self.analysis_dir, server = self.server, channel=self.channel) working_samples = samples_singlefake working_samples = setSumWeights(working_samples) print('###########################################################') print('# measuring singlefakerake...') print('# %d samples to be used:'%(len(working_samples))) print('###########################################################') for w in working_samples: print('{:<20}{:<20}'.format(*[w.name,('path: '+w.ana_dir)])) chain = TChain('tree') #TChain'ing all data samples together for i,s in enumerate(working_samples): sample = working_samples[0] file_name = '/'.join([sample.ana_dir, sample.dir_name, sample.tree_prod_name, 'tree.root']) chain.Add(file_name) dataframe = RDataFrame(chain) weight = 'weight * lhe_weight' dataframe = dataframe.Define('w',weight)\ .Define('ptCone',self.ptCone())\ .Define('abs_hnl_hn_vis_eta','abs(hnl_hn_vis_eta)')\ .Define('abs_hnl_hn_eta','abs(hnl_hn_eta)')\ .Define('abs_l1_eta','abs(l1_eta)')\ .Define('abs_l2_eta','abs(l2_eta)')\ .Define('abs_l1_jet_flavour_parton','abs(l1_jet_flavour_parton)')\ .Define('abs_l2_jet_flavour_parton','abs(l2_jet_flavour_parton)')\ # bins_ptCone = np.array([5.,10., 20., 30., 40.,70., 2000]) # bins_eta = np.array([0., 0.8, 1.2, 2.4]) bins_ptCone = np.array([5.,10., 20., 30., 40.,70.]) bins_eta = np.array([0., 0.8, 1.2, 2.4]) selection_baseline = getSelection(self.channel,'MR_SF') selection_LL_uncorrelated = '(' + ' & '\ .join([\ selection_baseline,\ getSelection(self.channel,'L_L_uncorrelated')\ ]) + ')' selection_TT_uncorrelated = '(' + ' & '\ .join([\ selection_baseline,\ getSelection(self.channel,'L_L_uncorrelated'),\ getSelection(self.channel,'T_T')\ ]) + ')' h_LL_uncorrelated = dataframe\ .Filter(selection_LL_uncorrelated)\ .Histo2D(('h_LL_uncorrelated','h_LL_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w') #name the axis, also initiate the dataframe call h_LL_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta') h_TT_uncorrelated = dataframe\ .Filter(selection_TT_uncorrelated)\ .Histo2D(('h_TT_uncorrelated','h_TT_uncorrelated',len(bins_ptCone)-1,bins_ptCone, len(bins_eta)-1, bins_eta),'ptCone','abs_hnl_hn_vis_eta','w') #name the axis, also initiate the dataframe call h_TT_uncorrelated.SetTitle(';ptCone [GeV]; dimuon #eta') # preparing the histo and save it into a .root file sfr_TH2_dir = '/home/dehuazhu/HNL/CMSSW_9_4_6_patch1/src/PlotFactory/DataBkgPlots/modules/DDE_singlefake.root' sfr_hist = h_TT_uncorrelated.Clone() # sfr_hist = h_LL_uncorrelated.Clone() # sfrhist = h_baseline.Clone() # sfr_hist.Divide(h_LL_uncorrelated.Clone()) # dfr_hist.SaveAs(sfr_TH2_dir) #uncomment this to save the TH2 # draw the histo if required if drawPlot == True: can = TCanvas('can', '') # sfr_hist.Draw('colzTextE') # sfr_hist.Draw('colz') sfr_hist.Draw() pf.showlumi('%d entries'%(sfr_hist.GetEntries())) # pf.showlogopreliminary() can.Update() set_trace()
lb=train + validation, ub=100) } ######## # Main # ######## if __name__ == '__main__': args = parse_input() gInterpreter.Declare('auto rand_gen = TRandom3({});'.format(args.seed)) init_frame = RDataFrame(args.tree, args.ntp) rand_frame = init_frame.Define('rand_split', 'rand_gen.Uniform(0, 100)') if args.debug: print('loaded {} with {} entries'.format( args.ntp, rand_frame.Count().GetValue())) cuts = get_cuts(args.train_ratio, args.validation_ratio) for sample, cut in cuts.items(): subsample_frame = rand_frame.Filter(cut) output_ntp = join(args.output_dir, '{}_{}.root'.format(get_filename(args.ntp), sample)) subsample_frame.Snapshot(args.tree, output_ntp) if args.debug: print('sample: {}, cuts: {}'.format(sample, cut))
gInterpreter.Declare(''' Int_t getBin(Double_t x, Double_t y, TH2D* histo) { return histo->FindFixBin(x, y); } auto getWeight(Double_t x, Double_t y, TH2D* histo) { auto binIdx = getBin(x, y, histo); return histo->GetBinContent(binIdx); } ''') gInterpreter.Declare(f'auto histoNtp = new TFile("{histoNtpN}", "read");') gInterpreter.Declare(f'auto histo = dynamic_cast<TH2D*>(histoNtp->Get("{histoN}"));') dfInit = RDataFrame(mcTreeN, mcNtpN) df = dfInit.Define('wjk_alt', 'getWeight(b_ownpv_ndof, ntracks, histo)').Define('wt', 'wpid*wtrk*wjk_alt') # NOTE: This comes from the existing ntuple mcRootBrs = df.AsNumpy(columns=['wjk_occ', 'wjk_alt']) wtJkOccRoot = mcRootBrs['wjk_occ'] wtJkOccAltRoot = mcRootBrs['wjk_alt'] histoRootMdl = TH2DModel( 'histoRoot', 'histoRoot', 20, 1, 200, 20, 0, 450 ) histoRoot = df.Histo2D(histoRootMdl, 'b_ownpv_ndof', 'ntracks', 'wt') ################## # Histo w/ numpy #