示例#1
0
 def __initializeSample(self,sample):
     self.sample = sample
     tchain = ROOT.TChain(self.treeName)
     sampleDirectory = '{0}/{1}'.format(self.ntupleDirectory,sample)
     summedWeights = 0.
     for f in glob.glob('{0}/*.root'.format(sampleDirectory)):
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     self.sampleLumi = float(summedWeights)/getXsec(sample) if getXsec(sample) else 0.
     self.sampleTree = tchain
示例#2
0
 def __initializeSample(self, sample):
     self.sample = sample
     tchain = ROOT.TChain(self.treeName)
     sampleDirectory = '{0}/{1}'.format(self.ntupleDirectory, sample)
     summedWeights = 0.
     for f in glob.glob('{0}/*.root'.format(sampleDirectory)):
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     self.sampleLumi = float(summedWeights) / getXsec(sample) if getXsec(
         sample) else 0.
     self.sampleTree = tchain
 def __initializeNtuple(self):
     tchain = ROOT.TChain(self.treeName)
     if self.inputFileList:  # reading from a passed list of inputfiles
         allFiles = []
         with open(self.inputFileList, 'r') as f:
             for line in f.readlines():
                 allFiles += [line.strip()]
     else:  # reading from an input directory (all files in directory will be processed)
         allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory))
     if len(allFiles) == 0:
         logging.error('No files found for sample {0}'.format(self.sample))
     summedWeights = 0.
     for f in allFiles:
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     if not summedWeights and not isData(self.sample):
         logging.warning('No events for sample {0}'.format(self.sample))
     self.intLumi = float(getLumi())
     self.xsec = getXsec(self.sample)
     self.sampleLumi = float(summedWeights) / self.xsec if self.xsec else 0.
     self.sampleTree = tchain
     self.files = allFiles
     self.initialized = True
     logging.debug(
         'Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'
         .format(self.sample, summedWeights, self.xsec, self.sampleLumi,
                 self.intLumi))
 def __initializeNtuple(self):
     tchain = ROOT.TChain(self.treeName)
     if self.inputFileList: # reading from a passed list of inputfiles
         allFiles = []
         with open(self.inputFileList,'r') as f:
             for line in f.readlines():
                allFiles += [line.strip()]
     else: # reading from an input directory (all files in directory will be processed)
         #allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory))
         allFiles = []
         for root, dirnames, fnames in os.walk(self.ntupleDirectory):
             if 'failed' in root: continue
             for fname in fnmatch.filter(fnames, '*.root'):
                 allFiles.append(os.path.join(root,fname))
     if len(allFiles)==0: logging.error('No files found for sample {0}'.format(self.sample))
     summedWeights = 0.
     for f in allFiles:
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample))
     self.xsec = getXsec(self.sample)
     self.sampleLumi = float(summedWeights)/self.xsec if self.xsec else 0.
     self.sampleTree = tchain
     self.files = allFiles
     self.initialized = True
     logging.debug('Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'.format(self.sample,summedWeights,self.xsec,self.sampleLumi,self.intLumi))
 def __initializeNtuple(self):
     tchain = ROOT.TChain(self.treeName)
     if self.inputFileList: # reading from a passed list of inputfiles
         allFiles = []
         with open(self.inputFileList,'r') as f:
             for line in f.readlines():
                allFiles += [line.strip()]
     else: # reading from an input directory (all files in directory will be processed)
         allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory))
     #elif os.path.isfile(self.ntuple): # reading a single root file
     #    allFiles = [self.ntuple]
     if len(allFiles)==0: logging.error('No files found for sample {0}'.format(self.sample))
     summedWeights = 0.
     for f in allFiles:
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     if not summedWeights and not isData(self.sample): logging.warning('No events for sample {0}'.format(self.sample))
     self.xsec = getXsec(self.sample)
     if not self.xsec: logging.error('No xsec for sample {0}'.format(self.sample))
     self.sampleLumi = float(summedWeights)/self.xsec if self.xsec else 0.
     self.sampleTree = tchain
     self.j += 1
     #listname = 'selList{0}'.format(self.j)
     #self.sampleTree.Draw('>>{0}'.format(listname),'1','entrylist')
     #skim = ROOT.gDirectory.Get(listname)
     #self.entryListMap['1'] = skim
     self.files = allFiles
     self.initialized = True
     if not self.temp: self.fileHash = hashFile(*self.files)
     if self.useProof: self.sampleTree.SetProof()
     logging.debug('Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'.format(self.sample,summedWeights,self.xsec,self.sampleLumi,self.intLumi))
示例#6
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    args = parse_command_line(argv)

    if args.verbose and args.analysis:
        table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries'])
    else:
        table = PrettyTable(['Sample','xsec [pb]'])
    table.align = 'r'
    table.align['Sample'] = 'l'

    ntupleDir = getAnalysisNtupleDirectory(args.analysis,True) if args.verbose and args.analysis else getNtupleDirectory(version=args.version)

    #Odd mix of local pathnames and xrootd access
#    for sample in sorted(hdfs_ls_directory(ntupleDir)):
    for sample in (glob.glob('/'.join([ntupleDir,'*']))):
        name = os.path.basename(sample)
        logging.info('Processing {0}'.format(name))
        data = isData(name)
        xsec = getXsec(name)
        if args.verbose and args.analysis:
            print sample
            fnames = get_hdfs_root_files(sample)
            # get total events, total weights
            tree = ROOT.TChain(getTreeName(args.analysis))
            summedWeights = 0.
            for f in fnames:
                tfile = ROOT.TFile.Open('/hdfs'+f)
                summedWeights += tfile.Get("summedWeights").GetBinContent(1)
                tfile.Close()
                tree.Add('/hdfs'+f)
            numEntries = tree.GetEntries(args.selection)
            weightedEntries = 0.
            negevents = 0.
            seltree = tree.CopyTree(args.selection)
            for row in seltree:
                if data:
                    weightedEntries += 1.
                else:
                    weightedEntries += row.genWeight
                    if row.genWeight<0.: negevents += 1
            if data:
                sampleLumi = getLumi()
            else:
                sampleLumi = float(summedWeights)/xsec if xsec else 0.
            negratio = float(negevents)/numEntries if numEntries else 0.
            effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0.
            table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))])
        else:
            table.add_row([name,xsec])

    print table.get_string()
示例#7
0
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    args = parse_command_line(argv)

    if args.verbose and args.analysis:
        table = PrettyTable(['Sample','xsec [pb]','entries','ratio neg.','lumi. [/pb]','eff. entries'])
    else:
        table = PrettyTable(['Sample','xsec [pb]'])
    table.align = 'r'
    table.align['Sample'] = 'l'

    ntupleDir = getAnalysisNtupleDirectory(args.analysis) if args.verbose and args.analysis else getNtupleDirectory(version=args.version)

    for sample in sorted(glob.glob(os.path.join(ntupleDir,'*'))):
        name = os.path.basename(sample)
        logging.info('Processing {0}'.format(name))
        data = isData(name)
        xsec = getXsec(name)
        if args.verbose and args.analysis:
            fnames = get_hdfs_root_files(sample)
            # get total events, total weights
            tree = ROOT.TChain(getTreeName(args.analysis))
            summedWeights = 0.
            for f in fnames:
                tfile = ROOT.TFile.Open('/hdfs'+f)
                summedWeights += tfile.Get("summedWeights").GetBinContent(1)
                tfile.Close()
                tree.Add('/hdfs'+f)
            numEntries = tree.GetEntries(args.selection)
            weightedEntries = 0.
            negevents = 0.
            seltree = tree.CopyTree(args.selection)
            for row in seltree:
                if data:
                    weightedEntries += 1.
                else:
                    weightedEntries += row.genWeight
                    if row.genWeight<0.: negevents += 1
            if data:
                sampleLumi = getLumi()
            else:
                sampleLumi = float(summedWeights)/xsec if xsec else 0.
            negratio = float(negevents)/numEntries if numEntries else 0.
            effevents = weightedEntries*getLumi()/sampleLumi if sampleLumi else 0.
            table.add_row([name,'{0:.6f}'.format(float(xsec)),numEntries,'{0:.3}'.format(float(negratio)),'{0:.6f}'.format(float(sampleLumi)),'{0:.3f}'.format(float(effevents))])
        else:
            table.add_row([name,xsec])

    print table.get_string()
示例#8
0
 def __initializeNtuple(self):
     tchain = ROOT.TChain(self.treeName)
     if self.inputFileList:  # reading from a passed list of inputfiles
         allFiles = []
         with open(self.inputFileList, 'r') as f:
             for line in f.readlines():
                 allFiles += [line.strip()]
     else:  # reading from an input directory (all files in directory will be processed)
         allFiles = glob.glob('{0}/*.root'.format(self.ntupleDirectory))
     #elif os.path.isfile(self.ntuple): # reading a single root file
     #    allFiles = [self.ntuple]
     if len(allFiles) == 0:
         logging.error('No files found for sample {0}'.format(self.sample))
     summedWeights = 0.
     for f in allFiles:
         tfile = ROOT.TFile.Open(f)
         summedWeights += tfile.Get("summedWeights").GetBinContent(1)
         tfile.Close()
         tchain.Add(f)
     if not summedWeights and not isData(self.sample):
         logging.warning('No events for sample {0}'.format(self.sample))
     self.intLumi = float(getLumi())
     self.xsec = getXsec(self.sample)
     if not self.xsec:
         logging.error('No xsec for sample {0}'.format(self.sample))
     self.sampleLumi = float(summedWeights) / self.xsec if self.xsec else 0.
     self.sampleTree = tchain
     self.j += 1
     #listname = 'selList{0}'.format(self.j)
     #self.sampleTree.Draw('>>{0}'.format(listname),'1','entrylist')
     #skim = ROOT.gDirectory.Get(listname)
     #self.entryListMap['1'] = skim
     self.files = allFiles
     self.initialized = True
     if not self.temp: self.fileHash = hashFile(*self.files)
     if self.useProof: self.sampleTree.SetProof()
     logging.debug(
         'Initialized {0}: summedWeights = {1}; xsec = {2}; sampleLumi = {3}; intLumi = {4}'
         .format(self.sample, summedWeights, self.xsec, self.sampleLumi,
                 self.intLumi))
示例#9
0
    def __init__(self,**kwargs):
        inputTreeName = kwargs.pop('inputTreeName','WZTree')
        super(WZTrainer,self).__init__(**kwargs)
        
        sampleDir = 'ntuples/WZ'
        sampleMap = {
            "dy10"     : "DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "dy50"     : "DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "ggzz2e2m" : "GluGluToContinToZZTo2e2mu_13TeV_MCFM701_pythia8",
            "ggzz2e2t" : "GluGluToContinToZZTo2e2tau_13TeV_MCFM701_pythia8",
            "ggzz2m2t" : "GluGluToContinToZZTo2mu2tau_13TeV_MCFM701_pythia8",
            "ggzz4e"   : "GluGluToContinToZZTo4e_13TeV_MCFM701_pythia8",
            "ggzz4m"   : "GluGluToContinToZZTo4mu_13TeV_MCFM701_pythia8",
            "ggzz4t"   : "GluGluToContinToZZTo4tau_13TeV_MCFM701_pythia8",
            "tt"       : "TTJets_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "ttw"      : "TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8",
            "w"        : "WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "ww"       : "WWTo2L2Nu_13TeV-powheg",
            "wz3lnu"   : "WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8",
            "wz2l2q"   : "WZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8",
            "wzz"      : "WZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8",
            "zg"       : "ZGTo2LG_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "zz2l2n"   : "ZZTo2L2Nu_13TeV_powheg_pythia8",
            "zz2l2q"   : "ZZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8",
            "zz4l"     : "ZZTo4L_13TeV_powheg_pythia8",
            #"tzq"      : "tZq_ll_4f_13TeV-amcatnlo-pythia8_TuneCUETP8M1",
        }

        # get the trees
        intLumis = {}
        for s in sampleMap:
            summedWeights = 0.
            for f in glob.glob('{0}/{1}/*.root'.format(sampleDir, sampleMap[s])):
                tfile = ROOT.TFile.Open(f)
                hist = tfile.Get('summedWeights')
                summedWeights += hist.GetBinContent(1)
                tfile.Close()
            intLumis[s] = float(summedWeights)/getXsec(sampleMap[s])
        sigTrees = {}
        for sig in ['wz3lnu']:
            sigTrees[sig] = ROOT.TChain('WZTree')
            for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,sampleMap[sig])):
                sigTrees[sig].Add(f)
        bgTrees = {}
        for bg in ['dy10','dy50','ggzz2e2m','ggzz2m2t','ggzz4e','ggzz4m','ggzz4t','tt','ttw','wzz','zg','zz2l2n','zz2l2q','zz4l']:
            bgTrees[bg] = ROOT.TChain('WZTree')
            for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,sampleMap[bg])):
                bgTrees[bg].Add(f)

        intLumi = getLumi()

        # add to factory
        for sig in sigTrees:
            self.factory.AddSignalTree(sigTrees[sig],intLumi/intLumis[sig])
        for bg in bgTrees:
            self.factory.AddBackgroundTree(bgTrees[bg],intLumi/intLumis[bg])

        # per event weight
        weight = 'genWeight'
        self.factory.SetWeightExpression(weight)

        # variables
        self.factory.AddVariable('z1_pt','F')
        self.factory.AddVariable('z2_pt','F')
        self.factory.AddVariable('w1_pt','F')
        self.factory.AddVariable('z_mass','F')
        self.factory.AddVariable('met_pt','F')
        self.factory.AddVariable('numBjetsTight30','I')

        # preselection cut
        passCut = ROOT.TCut('z1_passMedium==1 && z2_passMedium==1 && w1_passTight==1')
        self.factory.PrepareTrainingAndTestTree(
            passCut,
            ":".join(
                [
                "nTrain_Signal=0",
                "nTrain_Background=0",
                "SplitMode=Random",
                "NormMode=NumEvents",
                "!V"
                ]
            )
        )

        # options:
        # H : display help
        # V : turn on verbosity
        # IgnoreNegWeightsInTraining : ignore events with negative weights for training, keep for testing

        # book method
        method = self.factory.BookMethod(
            ROOT.TMVA.Types.kBDT,
            "BDT",
            ":".join(
                [
                    "NTrees=850",
                    "MaxDepth=3",
                    "BoostType=AdaBoost",
                    "AdaBoostBeta=0.5",
                    "SeparationType=GiniIndex",
                    "nCuts=20",
                    "PruneMethod=NoPruning",
                ]
            )
        )
示例#10
0
    def __init__(self, **kwargs):
        inputTreeName = kwargs.pop('inputTreeName', 'WZTree')
        super(WZTrainer, self).__init__(**kwargs)

        sampleDir = 'ntuples/WZ'
        sampleMap = {
            "dy10":
            "DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "dy50": "DYJetsToLL_M-50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "ggzz2e2m": "GluGluToContinToZZTo2e2mu_13TeV_MCFM701_pythia8",
            "ggzz2e2t": "GluGluToContinToZZTo2e2tau_13TeV_MCFM701_pythia8",
            "ggzz2m2t": "GluGluToContinToZZTo2mu2tau_13TeV_MCFM701_pythia8",
            "ggzz4e": "GluGluToContinToZZTo4e_13TeV_MCFM701_pythia8",
            "ggzz4m": "GluGluToContinToZZTo4mu_13TeV_MCFM701_pythia8",
            "ggzz4t": "GluGluToContinToZZTo4tau_13TeV_MCFM701_pythia8",
            "tt": "TTJets_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "ttw":
            "TTWJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-madspin-pythia8",
            "w": "WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "ww": "WWTo2L2Nu_13TeV-powheg",
            "wz3lnu": "WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8",
            "wz2l2q": "WZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8",
            "wzz": "WZZ_TuneCUETP8M1_13TeV-amcatnlo-pythia8",
            "zg": "ZGTo2LG_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8",
            "zz2l2n": "ZZTo2L2Nu_13TeV_powheg_pythia8",
            "zz2l2q": "ZZTo2L2Q_13TeV_amcatnloFXFX_madspin_pythia8",
            "zz4l": "ZZTo4L_13TeV_powheg_pythia8",
            #"tzq"      : "tZq_ll_4f_13TeV-amcatnlo-pythia8_TuneCUETP8M1",
        }

        # get the trees
        intLumis = {}
        for s in sampleMap:
            summedWeights = 0.
            for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,
                                                       sampleMap[s])):
                tfile = ROOT.TFile.Open(f)
                hist = tfile.Get('summedWeights')
                summedWeights += hist.GetBinContent(1)
                tfile.Close()
            intLumis[s] = float(summedWeights) / getXsec(sampleMap[s])
        sigTrees = {}
        for sig in ['wz3lnu']:
            sigTrees[sig] = ROOT.TChain('WZTree')
            for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,
                                                       sampleMap[sig])):
                sigTrees[sig].Add(f)
        bgTrees = {}
        for bg in [
                'dy10', 'dy50', 'ggzz2e2m', 'ggzz2m2t', 'ggzz4e', 'ggzz4m',
                'ggzz4t', 'tt', 'ttw', 'wzz', 'zg', 'zz2l2n', 'zz2l2q', 'zz4l'
        ]:
            bgTrees[bg] = ROOT.TChain('WZTree')
            for f in glob.glob('{0}/{1}/*.root'.format(sampleDir,
                                                       sampleMap[bg])):
                bgTrees[bg].Add(f)

        intLumi = getLumi()

        # add to factory
        for sig in sigTrees:
            self.factory.AddSignalTree(sigTrees[sig], intLumi / intLumis[sig])
        for bg in bgTrees:
            self.factory.AddBackgroundTree(bgTrees[bg], intLumi / intLumis[bg])

        # per event weight
        weight = 'genWeight'
        self.factory.SetWeightExpression(weight)

        # variables
        self.factory.AddVariable('z1_pt', 'F')
        self.factory.AddVariable('z2_pt', 'F')
        self.factory.AddVariable('w1_pt', 'F')
        self.factory.AddVariable('z_mass', 'F')
        self.factory.AddVariable('met_pt', 'F')
        self.factory.AddVariable('numBjetsTight30', 'I')

        # preselection cut
        passCut = ROOT.TCut(
            'z1_passMedium==1 && z2_passMedium==1 && w1_passTight==1')
        self.factory.PrepareTrainingAndTestTree(
            passCut, ":".join([
                "nTrain_Signal=0", "nTrain_Background=0", "SplitMode=Random",
                "NormMode=NumEvents", "!V"
            ]))

        # options:
        # H : display help
        # V : turn on verbosity
        # IgnoreNegWeightsInTraining : ignore events with negative weights for training, keep for testing

        # book method
        method = self.factory.BookMethod(
            ROOT.TMVA.Types.kBDT, "BDT", ":".join([
                "NTrees=850",
                "MaxDepth=3",
                "BoostType=AdaBoost",
                "AdaBoostBeta=0.5",
                "SeparationType=GiniIndex",
                "nCuts=20",
                "PruneMethod=NoPruning",
            ]))