def __init__(self,**kwargs):
        inputFileNames = kwargs.pop('inputFileNames',[])
        inputTreeDirectory = kwargs.pop('inputTreeDirectory','miniTree')
        inputTreeName = kwargs.pop('inputTreeName','MiniTree')
        inputLumiName = kwargs.pop('inputTreeName','LumiTree')
        outputFileName = kwargs.pop('outputFileName','analysisTree.root')
        outputTreeName = kwargs.pop('outputTreeName','AnalysisTree')
        self.shift = kwargs.pop('shift','')
        self.outputTreeName = outputTreeName
        if hasProgress:
            self.pbar = kwargs.pop('progressbar',ProgressBar(widgets=['{0}: '.format(outputTreeName),' ',SimpleProgress(),' events ',Percentage(),' ',Bar(),' ',ETA()]))
        # preselection
        if not hasattr(self,'preselection'): self.preselection = '1'
        # input files
        self.fileNames = []
        if os.path.isfile('PSet.py'):                # grab input files from crab pset
            import PSet
            self.fileNames = list(PSet.process.source.fileNames)
        elif isinstance(inputFileNames, basestring): # inputFiles is a file name
            if os.path.isfile(inputFileNames):       # single file
                if inputFileNames[-4:] == 'root':    # file is a root file
                    self.fileNames += [inputFileNames]
                else:                                # file is list of files
                    with open(inputFileNames,'r') as f:
                        for line in f:
                            self.fileNames += [line.strip()]
        else:
            self.fileNames = inputFileNames          # already a python list or a cms.untracked.vstring()
        if not isinstance(outputFileName, basestring): # its a cms.string(), get value
            outputFileName = outputFileName.value()
        # test for hdfs
        #self.hasHDFS = os.path.exists('/hdfs/store/user')
        self.hasHDFS = False
        # input tchain
        self.treename = '{0}/{1}'.format(inputTreeDirectory,inputTreeName)
        luminame = '{0}/{1}'.format(inputTreeDirectory,inputLumiName)
        #tchainLumi = ROOT.TChain(luminame)
        self.totalEntries = 0
        self.numLumis = 0
        self.numEvents = 0
        self.summedWeights = 0
        logging.info('Getting Lumi information')
        #self.skims = {}
        for f,fName in enumerate(self.fileNames):
            if fName.startswith('/store'): fName = '{0}/{1}'.format('/hdfs' if self.hasHDFS else 'root://cmsxrootd.hep.wisc.edu/',fName)
            tfile = ROOT.TFile.Open(fName)
            tree = tfile.Get(self.treename)
            #skimName = 'skim{0}'.format(f)
            #tree.Draw('>>{0}'.format(skimName),self.preselection,'entrylist')
            #skimlist = ROOT.gDirectory.Get(skimName)
            #listEvents = skimlist.GetN()
            #self.skims[f] = skimlist
            #self.totalEntries += listEvents
            self.totalEntries += tree.GetEntries()
            if not hasattr(self,'version'):
                tree.GetEntry(1)
                if hasattr(tree,'provenance'):
                    ver = tree.provenance[0].split('_')
                    self.version = ''.join([ver[1],ver[2],'X'])
                else:
                    self.version = getCMSSWVersion()
            lumitree = tfile.Get(luminame)
            for entry in lumitree:
                self.numLumis += 1
                self.numEvents += lumitree.nevents
                self.summedWeights += lumitree.summedWeights
            tfile.Close('R')
            #tchainLumi.Add(fName)
        # get the lumi info
        #self.numLumis = tchainLumi.GetEntries()
        #self.numEvents = 0
        #self.summedWeights = 0
        #for entry in xrange(self.numLumis):
        #    tchainLumi.GetEntry(entry)
        #    self.numEvents += tchainLumi.nevents
        #    self.summedWeights += tchainLumi.summedWeights
        logging.info('Analysis is running with version {0}'.format(self.version))
        logging.info("Will process {0} lumi sections with {1} events ({2}).".format(self.numLumis,self.numEvents,self.summedWeights))
        self.flush()
        if not len(self.fileNames): raise Exception
        # other input files
        self.pileupWeights = PileupWeights(self.version)
        self.fakeRates = FakeRates(self.version)
        self.leptonScales = LeptonScales(self.version)
        self.triggerScales = TriggerScales(self.version)
        self.triggerPrescales = TriggerPrescales(self.version)
        self.zptGenWeight = ZptGenWeight(self.version)
        self.zzGenWeight = ZZGenWeight(self.version)
        # tfile
        self.outfile = ROOT.TFile(outputFileName,"recreate")
        # cut tree
        self.cutTree = CutTree()
        # analysis tree
        self.tree = AnalysisTree(outputTreeName)
        self.eventsStored = 0

        # some things we always need:

        dysamples = [
            'DY1JetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8',
            'DY2JetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8',
            'DY3JetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8',
            'DY4JetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8',
            'DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8',
            'DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8',
        ]

        qqzzsamples = [
            'ZZTo4L_13TeV_powheg_pythia8',
            'ZZTo4L_13TeV-amcatnloFXFX-pythia8',
        ]

        # pileup
        self.tree.add(lambda cands: self.pileupWeights.weight(self.event)[0], 'pileupWeight', 'F')
        self.tree.add(lambda cands: self.pileupWeights.weight(self.event)[1], 'pileupWeightUp', 'F')
        self.tree.add(lambda cands: self.pileupWeights.weight(self.event)[2], 'pileupWeightDown', 'F')
        self.tree.add(lambda cands: self.event.vertices_count(), 'numVertices', 'I')
        self.tree.add(lambda cands: self.event.rho(), 'rho', 'F')

        # gen
        self.tree.add(lambda cands: self.event.nTrueVertices(), 'numTrueVertices', 'I')
        self.tree.add(lambda cands: self.event.NUP(), 'NUP', 'I')
        self.tree.add(lambda cands: self.event.isData(), 'isData', 'I')
        self.tree.add(lambda cands: self.event.genWeight(), 'genWeight', 'F')
        if any([x in fName for x in dysamples]):
            self.tree.add(lambda cands: self.zptGenWeight.weight(self.gen), 'zPtWeight', 'F')
        if any([x in fName for x in qqzzsamples]):
            self.tree.add(lambda cands: self.zzGenWeight.weight(self.gen), 'qqZZkfactor', 'F')
        self.tree.add(lambda cands: self.event.numGenJets(), 'numGenJets', 'I')
        self.tree.add(lambda cands: self.event.genHT(), 'genHT', 'I')
        # scale shifts
        weightMap = {
            0: {'muR':1.0, 'muF':1.0},
            1: {'muR':1.0, 'muF':2.0},
            2: {'muR':1.0, 'muF':0.5},
            3: {'muR':2.0, 'muF':1.0},
            4: {'muR':2.0, 'muF':2.0},
            5: {'muR':2.0, 'muF':0.5},
            6: {'muR':0.5, 'muF':1.0},
            7: {'muR':0.5, 'muF':2.0},
            8: {'muR':0.5, 'muF':0.5},
        }
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[0] if len(self.event.genWeights())>0 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[0]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[1] if len(self.event.genWeights())>1 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[1]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[2] if len(self.event.genWeights())>2 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[2]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[3] if len(self.event.genWeights())>3 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[3]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[4] if len(self.event.genWeights())>4 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[4]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[5] if len(self.event.genWeights())>5 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[5]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[6] if len(self.event.genWeights())>6 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[6]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[7] if len(self.event.genWeights())>7 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[7]), 'F')
        self.tree.add(lambda cands: 0. if self.event.isData() else self.event.genWeights()[8] if len(self.event.genWeights())>8 else 0., 'genWeight_muR{muR:3.1f}_muF{muF:3.1f}'.format(**weightMap[8]), 'F')
示例#2
0
 def __init__(self, **kwargs):
     inputFileNames = kwargs.pop('inputFileNames', [])
     inputTreeDirectory = kwargs.pop('inputTreeDirectory', '')
     inputTreeName = kwargs.pop('inputTreeName', 'AnalysisTree')
     outputFileName = kwargs.pop('outputFileName', 'analysisTree.root')
     outputTreeName = kwargs.pop('outputTreeName', 'AnalysisTree')
     self.outputTreeName = outputTreeName
     if hasProgress:
         self.pbar = kwargs.pop(
             'progressbar',
             ProgressBar(widgets=[
                 '{0}: '.format(outputTreeName), ' ',
                 SimpleProgress(), ' events ',
                 Percentage(), ' ',
                 Bar(), ' ',
                 ETA()
             ]))
     # input files
     self.fileNames = []
     if os.path.isfile('PSet.py'):  # grab input files from crab pset
         import PSet
         self.fileNames = list(PSet.process.source.fileNames)
     elif isinstance(inputFileNames,
                     basestring):  # inputFiles is a file name
         if os.path.isfile(inputFileNames):  # single file
             if inputFileNames[-4:] == 'root':  # file is a root file
                 self.fileNames += [inputFileNames]
             else:  # file is list of files
                 with open(inputFileNames, 'r') as f:
                     for line in f:
                         self.fileNames += [line.strip()]
     else:
         self.fileNames = inputFileNames  # already a python list or a cms.untracked.vstring()
     if not isinstance(outputFileName,
                       basestring):  # its a cms.string(), get value
         outputFileName = outputFileName.value()
     # test for hdfs
     #self.hasHDFS = os.path.exists('/hdfs/store/user')
     self.hasHDFS = False
     # input tchain
     self.treename = '{0}/{1}'.format(
         inputTreeDirectory,
         inputTreeName) if inputTreeDirectory else inputTreeName
     self.totalEntries = 0
     self.numLumis = 0
     self.numEvents = 0
     self.summedWeights = 0
     logging.info('Getting information')
     if len(self.fileNames) == 0: logging.warning('No files to process')
     if len(self.fileNames) > 1:
         logging.warning(
             'More than one file requested, only processing the first file')
     for f, fName in enumerate(self.fileNames):
         if fName.startswith('/store'):
             fName = '{0}/{1}'.format(
                 '/hdfs' if self.hasHDFS else
                 'root://cmsxrootd.hep.wisc.edu/', fName)
         tfile = ROOT.TFile.Open(fName)
         tree = tfile.Get(self.treename)
         self.totalEntries += tree.GetEntries()
         if not hasattr(self, 'version'):
             tree.GetEntry(1)
             if hasattr(tree, 'provenance'):
                 ver = tree.provenance[0].split('_')
                 self.version = ''.join([ver[1], ver[2], 'X'])
             else:
                 self.version = getCMSSWVersion()
         tfile.Close('R')
     logging.info('Analysis is running with version {0}'.format(
         self.version))
     self.flush()
     if not len(self.fileNames): raise Exception
     # other input files
     self.pileupWeights = PileupWeights(self.version)
     self.fakeRates = FakeRates(self.version)
     self.leptonScales = LeptonScales(self.version)
     self.triggerScales = TriggerScales(self.version)
     self.triggerPrescales = TriggerPrescales(self.version)
     self.zptGenWeight = ZptGenWeight(self.version)
     self.zzGenWeight = ZZGenWeight(self.version)
     # tfile
     fName = self.fileNames[0]
     if fName.startswith('/store'):
         fName = '{0}/{1}'.format(
             '/hdfs' if self.hasHDFS else 'root://cmsxrootd.hep.wisc.edu/',
             fName)
     self.tfile = ROOT.TFile.Open(fName, 'READ')
     self.oldtree = self.tfile.Get(self.treename)
     self.outfile = ROOT.TFile(outputFileName, "recreate")
     self.tree = self.oldtree.CloneTree(0)
     summedWeights = self.tfile.Get('summedWeights')
     self.summedWeights = summedWeights.GetBinContent(1)