Пример #1
0
 def check(var, nbins, low, up, friend_var="Particle.{}", frame=df):
     h = f"{origin} {var}"
     h = frame.Histo1D(RDF.TH1DModel(h, h, nbins, low, up), f"f{var}")
     friend_var = friend_var.format(var)
     h2 = f"{friend} {friend_var}"
     h2 = df_delphes.Histo1D(RDF.TH1DModel(h2, h2, nbins, low, up),
                             friend_var)
     h2.SetLineColor(2)
     h2.SetLineStyle(3)
     can = canvas(var, diff=True)
     can.cd(1)
     h.SetDirectory(0)
     h2.SetDirectory(0)
     hdrawn = [h.DrawCopy(), h2.DrawCopy("same")]
     for i in hdrawn:
         i.SetDirectory(0)
     leg = TLegend(.7, .5, .9, .75)
     leg.AddEntry(h.GetValue())
     leg.AddEntry(h2.GetValue())
     leg.Draw()
     gPad.Update()
     can.cd(2)
     hdiff = h.DrawCopy()
     hdiff.SetDirectory(0)
     hdiff.SetName("hdiff")
     hdiff.SetTitle("diff")
     hdiff.Add(h2.GetValue(), -1)
     hdiff.GetYaxis().SetRangeUser(-1, 1)
     gPad.Update()
     for i in range(1, hdiff.GetNbinsX() + 1):
         diff = hdiff.GetBinContent(i)
         if diff != 0:
             return False
     return True
Пример #2
0
 def plot(frame, x):
     hn = f"{x[0]}"
     ht = f";{x[0]}"
     h = frame.Histo1D(RDF.TH1DModel(hn, ht, x[1], x[2], x[3]), x[0])
     can = canvas(hn)
     can.SetLeftMargin(0.15)
     h.Draw("COLZ")
     can.Update()
Пример #3
0
def split(file_name): #without.root
    tfile = rt.TFile(file_name+'.root')
    tree = tfile.Get('tree')

    df = RDF(tree)
    n = tree.GetEntries()
    df1 = df.Range(0,int(n/2))
    df2 = df.Range(int(n/2),0)
    df1.Snapshot('tree', '%s_training_half.root'%file_name)
    df2.Snapshot('tree', '%s_untouched_half.root'%file_name)
Пример #4
0
 def correlate(frame, x, y):
     hn = f"{x[0]}_vs_{y[0]}"
     ht = f";{x[0]};{y[0]}"
     h = frame.Histo2D(
         RDF.TH2DModel(hn, ht, x[1], x[2], x[3], y[1], y[2], y[3]),
         x[0], y[0])
     can = canvas(hn)
     can.SetLeftMargin(0.15)
     h.Draw("COLZ")
     can.Update()
Пример #5
0
def merge_friend(output_ntp_name, friends, tree_branch_dict, config):
    # Here we don't drop any branch. We do only keep specified trees.
    opts = RDF.RSnapshotOptions()
    opts.fMode = 'UPDATE'

    for full_path, tree in friends.items():
        if config[full_path]['keep']:
            rd1 = RDataFrame(tree)
            cut = concat_selections(config[full_path]['selection'])

            if cut:
                rd2 = rd1.Filter(cut)
            else:
                rd2 = rd1

            output_br = make_output_vec(tree_branch_dict[full_path])
            rd2.Snapshot(full_path, output_ntp_name, output_br, opts)
Пример #6
0
def produceLightTree(sample='DY',ch='mmm'):
    if ch == 'mmm':
        d17B = data_B_mmm+suffix; d17C = data_C_mmm+suffix; d17D = data_D_mmm+suffix; d17E = data_E_mmm+suffix; d17F = data_F_mmm+suffix; 
        SFR_012_L = SFR_MMM_012_L
        l2_tight = l2_m_tight

    if ch == 'eem':
        d17B = data_B_eem+suffix; d17C = data_C_eem+suffix; d17D = data_D_eem+suffix; d17E = data_E_eem+suffix; d17F = data_F_eem+suffix; 

    t = rt.TChain('tree')

    if sample == 'DY':
        t.Add(DY)
        t.Add(DY_ext)

    if sample == 'data':
        t.Add(d17B)
       #t.Add(d17C)
       #t.Add(d17D)
       #t.Add(d17E)
       #t.Add(d17F)

    print '\n\ttotal entries:', t.GetEntries()

    df = RDF(t)
    df1 = df.Define('LOOSE', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 )' )
    df2 = df1.Define('TIGHT', '1 * (' + SFR_012_L + ' && hnl_dr_12 > 0.3 && hnl_dr_02 > 0.3 && abs(hnl_m_01 - 91.19) < 10 && hnl_q_01 == 0 && ' + l2_tight + ')' )

    num_L = df2.Filter('LOOSE == 1').Count().GetValue()
    print '\n\tloose entries in MR:', num_L 

    num_T = df2.Filter('TIGHT == 1').Count().GetValue()
    print '\n\ttight entries in MR:', num_T

    df2 = df2.Define('ptcone', PTCONEL2)

    branchList = rt.vector('string')()
    for br in ['event', 'lumi', 'run', 'LOOSE', 'TIGHT', 'l2_reliso_rho_03', 'l2_Medium', 'l2_eta', 'l2_pt', 'l2_dxy', 'l2_dz', 'ptcone']:
        branchList.push_back(br)
    df2.Snapshot('tree', saveDir+'/%s_%s_6_24B_Lcut_29_4.root'%(sample,ch), branchList)
Пример #7
0
    def __init__(self, name, label, selection, datacard_name, colour,
                 position_in_stack, basedir, postfix, isdata, ismc, issignal,
                 weight, xs):
        self.name = name
        print 'loading', self.name
        self.label = label
        self.selection = selection
        self.datacard_name = datacard_name
        self.colour = colour
        self.position_in_stack = position_in_stack
        self.basedir = basedir
        self.postfix = postfix
        self.isdata = isdata
        self.ismc = ismc
        self.issignal = issignal
        self.weight = weight
        self.xs = xs
        self.nevents = 1.
        self.file = '/'.join([basedir, self.name, postfix])

        if not self.isdata:
            nevents_file = '/'.join(
                [basedir, self.name, 'SkimAnalyzerCount/SkimReport.txt'])
            with open(nevents_file) as ff:
                lines = ff.readlines()
                for line in lines:
                    if 'Sum Norm Weights' in line:
                        self.nevents = float(re.findall(r'\d+', lines[2])[0])
                        break
        tree_file = '/'.join([self.basedir, self.name, self.postfix])

        rdf = RDF('tree', tree_file)
        rdf = rdf.Filter(self.selection)
        # set_trace()
        df = rdf.AsNumpy()
        self.df = pd.DataFrame(df)
        # scale to 1/pb
        self.lumi_scaling = 1. if self.isdata else (self.xs / self.nevents)
Пример #8
0
    "AK15Jet_DeepAK15_TvsQCD",
]

branch_vec = ROOT.vector("string")()
[branch_vec.push_back(branch) for branch in branches]

if not options.is_dataframe:
    print(
        "No dataframe was given. Handling the arguments as trees and adding them to chain."
    )
    input_files = args
    input_chain = ROOT.TChain("MVATree")
    for input_file in input_files:
        input_chain.Add(input_file)
    print("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF("tree", input_file)

print("Finished converting the chain to RDataFrame")

if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + options.name + "_dataframe.root")
    data_frame.Snapshot(
        "tree", data_mc_string + "_" + options.name + "_dataframe.root",
        branch_vec)
    print("saved dataframe to disk ...")
Пример #9
0
    if 'fatjet' in coll[0]:
        cname = 'AntiKt10TruthTrimmedPtFrac5SmallR20JetsAux'
    elif 'jet' in coll[0]:
        cname = 'AntiKt4TruthDressedWZJetsAux'
    elif 'electron' in coll[0]:
        cname = 'TruthElectronsAuxDyn'
        varsuff = '_dressed'
    elif 'muon' in coll[0]:
        cname = 'TruthMuonsAuxDyn'
        varsuff = '_dressed'
    else:
        print('collection {} not handled, please add'.format(coll))
        continue
    varsuff += '[0]'

    model = RDF.TH1DModel(cname + 'eta', models['eta'][0].format(coll[0]),
                          models['eta'][1], models['eta'][2], models['eta'][3])
    h = rdf.Define(cname+'_eta_0', cname+'.eta'+varsuff) \
           .Histo1D(model, cname+'_eta_0', 'wgt')
    h.Draw()
    canv.SaveAs(outdir + '/' + coll[0] + '_eta.pdf')
    canv.Clear('D')

    model = RDF.TH1DModel(cname + 'phi', models['phi'][0].format(coll[0]),
                          models['phi'][1], models['phi'][2], models['phi'][3])
    h = rdf.Define(cname+'_phi_0', cname+'.phi'+varsuff) \
           .Histo1D(model, cname+'_phi_0', 'wgt')
    h.Draw()
    canv.SaveAs(outdir + '/' + coll[0] + '_phi.pdf')
    canv.Clear('D')

    canv.SetLogy()
Пример #10
0
# initialize RDataFrame
data_frame = None

# either create the RDataFrame from a ROOT tree in a file or from a ROOT chain made up of several files
# or create it directly from an existing RDataFrame including a ROOT tree
if not options.is_dataframe:
    print(
        "No dataframe was given. Handling the arguments as trees and adding them to chain."
    )
    input_files = args
    input_chain = ROOT.TChain(options.treename)
    for input_file in input_files:
        input_chain.Add(input_file)
    print("Finished loading chain with ", input_chain.GetEntries(), " entries")
    data_frame = RDF(input_chain, branch_vec)
else:
    print("Dataframe flag was set. Handling argument as dataframe.")
    input_file = args[0]
    data_frame = RDF(options.treename, input_file)

print("Finished creating the RDataFrame")

# possibly save the created RDataFrame to disk
if not options.is_dataframe and options.save:
    print("saving dataframe to disk as ",
          data_mc_string + "_" + names + "_dataframe.root")
    data_frame.Snapshot("tree",
                        data_mc_string + "_" + names + "_dataframe.root",
                        branch_vec)
    print("saved dataframe to disk ...")
Пример #11
0
parser.add_argument("config", help="Path to the YAML configuration file")
args = parser.parse_args()

ROOT.ROOT.EnableImplicitMT()
gROOT.SetBatch()

with open(os.path.expandvars(args.config), 'r') as stream:
    try:
        params = yaml.full_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

# define paths for loading data and storing results
mc_path = os.path.expandvars(params['MC_PATH'])
data_path = os.path.expandvars(params['DATA_PATH'])
dataDF = RDF('DataTable', data_path)
mcDF = RDF('SignalTable', mc_path)
genDF = RDF('GenTable', mc_path)

results_dir = os.environ['HYPERML_RESULTS_{}'.format(params['NBODY'])]
file_name = results_dir + '/' + params['FILE_PREFIX'] + '_std.root'
results_file = TFile(file_name, 'recreate')

for cclass in params['CENTRALITY_CLASS']:
    cent_dir = results_file.mkdir('{}-{}'.format(cclass[0], cclass[1]))
    dataCentDF = dataDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    mcCentDF = mcDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
    genCentDF = genDF.Filter('centrality >= {} && centrality < {}'.format(
        cclass[0], cclass[1]))
Пример #12
0
import root_numpy as rnp
import uproot as ur
import pandas as pd
import numpy as np

tf_WJ = rt.TFile(
    '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/WJetsToLNu/HNLTreeProducer/tree.root'
)
tf_DY = rt.TFile(
    '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/DYBB/HNLTreeProducer/tree.root'
)

t_WJ = tf_WJ.Get('tree')
t_DY = tf_DY.Get('tree')

rdf_WJ = RDF(t_WJ)
rdf_DY = RDF(t_DY)

uf_WJ = ur.open(
    '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/WJetsToLNu/HNLTreeProducer/tree.root'
)
uf_DY = ur.open(
    '/work/dezhu/4_production/production_20190411_Bkg_mmm/ntuples/DYBB/HNLTreeProducer/tree.root'
)

ut_WJ = uf_WJ['tree']
ut_DY = uf_DY['tree']

pdf_WJ_out = ut_WJ.pandas.df(
    ['event', 'lumi', 'run', 'l2_pt', 'l2_dxy', 'l2_eta'])
pdf_DY_out = ut_DY.pandas.df(