示例#1
0
    def __init__(self, samples, path, config, optionsList, GroupDict=None):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #! Read arguments and initialise variables

        print "Start Creating HistoMaker"
        print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi = 0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "Cuts:",self.cuts
        self.tc = TreeCache(
            self.cuts, samples, path, config
        )  # created cached tree i.e. create new skimmed trees using the list of cuts
        #print self.cuts
        self.tc = TreeCache(self.cuts, samples, path, config)
        self._rebin = False
        self.mybinning = None
        self.GroupDict = GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"
示例#2
0
    def __init__(self, samples, path, config, optionsList, GroupDict=None):
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi = 0.
        self.cuts = []
        self.weight = []
        self.sys_cuts = []

        for options in optionsList:
            self.cuts.append(options['cut'])
            self.weight.append(options['weight'])
            #self.sys_cuts.append(options['sys_cut'])

        print '  with Cuts  : ', self.cuts[0]
        print '  and Weights: ', self.weight[0]

        self.tc = TreeCache(self.cuts, samples, path, config)

        self._rebin = False
        self.mybinning = None
        self.GroupDict = GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)
示例#3
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(vhbb_name_space)
        
        self.__weight = config.get("TrainRegression","weight")
        self.__vars = config.get("TrainRegression","vars").split()
        self.__target = config.get("TrainRegression","target")
        self.__cut = config.get("TrainRegression","cut")
        self.__title = config.get("TrainRegression","name")
        self.__signals = config.get("TrainRegression","signals")
        self.__regOptions = config.get("TrainRegression","options")
        path = config.get('Directories','PREPout')
        samplesinfo=config.get('Directories','samplesinfo')
        self.__info = ParseInfo(samplesinfo,path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut],self.__samples,path,config)
        self.__trainCut = config.get("TrainRegression","trainCut")
        self.__testCut = config.get("TrainRegression","testCut")
        self.__config = config
        
    
    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG'%job.name
            signals.append(self.__tc.get_tree(job,'%s & %s' %(self.__cut,self.__trainCut)))
            signalsTest.append(self.__tc.get_tree(job,'%s & %s'%(self.__cut,self.__testCut)))
            
        # Get the tree from signal
        tree = signals[0]
        
        sWeight = 1.
        fnameOutput='training_Reg_'+self.__title+'.root'
        output = ROOT.TFile.Open('/exports/uftrig01a/dcurry/data/bbar/13TeV/heppy/files/regr_out/'+fnameOutput, "RECREATE")
        print '\n----- Saving output to ', output
        
        factory = ROOT.TMVA.Factory('MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression')
        
        for i, signal in enumerate(signals):
            factory.AddRegressionTree( signal,  sWeight, ROOT.TMVA.Types.kTraining)
            factory.AddRegressionTree( signalsTest[i],  sWeight, ROOT.TMVA.Types.kTesting)
            
            for var in self.__vars:
                factory.AddVariable(var,'D')
                
            factory.AddTarget(self.__target )
            mycut = ROOT.TCut( self.__cut )
            factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) 
            factory.TrainAllMethods()
            factory.TestAllMethods()
            factory.EvaluateAllMethods()
            output.Write()
        
        '''
示例#4
0
    def __init__(self,
                 samples,
                 path,
                 config,
                 optionsList,
                 GroupDict=None,
                 filelist=None,
                 mergeplot=False,
                 sample_to_merge=None,
                 mergeCachingPart=-1,
                 plotMergeCached=False,
                 branch_to_keep=None,
                 dccut=None,
                 remove_sys=None):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #mergeCachingPart: number of the output file in mergecaching step
        #plotMergeCached: use partially merged files from mergecaching and merge completely before plotting
        #! Read arguments and initialise variables

        if filelist:
            print 'len(filelist)', len(filelist)
        #print "Start Creating HistoMaker"
        #print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi = 0.
        self.cuts = []
        #self.Custom_BDT_bins = None
        self.BDTmin = None
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "The cut is:",self.cuts
        self.tc = TreeCache(
            self.cuts, samples, path, config, filelist, mergeplot,
            sample_to_merge, mergeCachingPart, plotMergeCached, branch_to_keep,
            False, dccut, remove_sys
        )  # created cached tree i.e. create new skimmed trees using the list of cuts
        if filelist and len(filelist) > 0 or mergeplot or sample_to_merge:
            print('ONLY CACHING PERFORMED, EXITING')
            return
        self._rebin = False
        self.mybinning = None
        self.GroupDict = GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"
示例#5
0
 def __init__(self, config):
     vhbb_name_space = config.get('VHbbNameSpace','library')
     ROOT.gSystem.Load(vhbb_name_space)
     
     self.__weight = config.get("TrainRegression","weight")
     self.__vars = config.get("TrainRegression","vars").split()
     self.__target = config.get("TrainRegression","target")
     self.__cut = config.get("TrainRegression","cut")
     self.__title = config.get("TrainRegression","name")
     self.__signals = config.get("TrainRegression","signals")
     self.__regOptions = config.get("TrainRegression","options")
     path = config.get('Directories','PREPout')
     samplesinfo=config.get('Directories','samplesinfo')
     self.__info = ParseInfo(samplesinfo,path)
     self.__samples = self.__info.get_samples(self.__signals)
     self.__tc = TreeCache([self.__cut],self.__samples,path,config)
     self.__trainCut = config.get("TrainRegression","trainCut")
     self.__testCut = config.get("TrainRegression","testCut")
     self.__config = config
示例#6
0
 def __init__(self, samples, path, config, optionsList,GroupDict=None):
     self.path = path
     self.config = config
     self.optionsList = optionsList
     self.nBins = optionsList[0]['nBins']
     self.lumi=0.
     self.cuts = []
     for options in optionsList:
         self.cuts.append(options['cut'])
     #print self.cuts
     #self.tc = TreeCache(self.cuts,samples,path) 
     self.tc = TreeCache(self.cuts,samples,path,config)
     self._rebin = False
     self.mybinning = None
     self.GroupDict=GroupDict
     self.calc_rebin_flag = False
     VHbbNameSpace=config.get('VHbbNameSpace','library')
     ROOT.gSystem.Load(VHbbNameSpace)
示例#7
0
 def __init__(self, config):
     vhbb_name_space = config.get('VHbbNameSpace','library')
     ROOT.gSystem.Load(vhbb_name_space)
     
     self.__weight = config.get("TrainRegression","weight")
     self.__vars = config.get("TrainRegression","vars").split()
     self.__target = config.get("TrainRegression","target")
     self.__cut = config.get("TrainRegression","cut")
     self.__title = config.get("TrainRegression","name")
     self.__signals = config.get("TrainRegression","signals")
     self.__regOptions = config.get("TrainRegression","options")
     path = config.get('Directories','PREPout')
     samplesinfo=config.get('Directories','samplesinfo')
     self.__info = ParseInfo(samplesinfo,path)
     self.__samples = self.__info.get_samples(self.__signals)
     self.__tc = TreeCache([self.__cut],self.__samples,path,config)
     self.__trainCut = config.get("TrainRegression","trainCut")
     self.__testCut = config.get("TrainRegression","testCut")
     self.__config = config
示例#8
0
    def __init__(self, samples, path, config, optionsList,GroupDict=None):
        #samples: list of the samples, data and mc
	#path: location of the samples used to perform the plot
	#config: list of the configuration files
	#optionsList: Dictionnary containing information on vars, including the cuts
	#! Read arguments and initialise variables
	print "The options are ", optionsList
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        self.tc = TreeCache(self.cuts,samples,path,config)# created cached tree i.e. create new skimmed trees using the list of cuts
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)
示例#9
0
文件: HistoMaker.py 项目: GLP90/Xbb
    def __init__(self, samples, path, config, optionsList, GroupDict=None, filelist=None, mergeplot=False, sample_to_merge=None, mergeCachingPart=-1, plotMergeCached=False,  branch_to_keep=None, dccut=None, remove_sys=None):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #mergeCachingPart: number of the output file in mergecaching step
        #plotMergeCached: use partially merged files from mergecaching and merge completely before plotting
        #! Read arguments and initialise variables

        if filelist:
            print 'len(filelist)',len(filelist)
        #print "Start Creating HistoMaker"
        #print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        #self.Custom_BDT_bins = None
        self.BDTmin= None
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "The cut is:",self.cuts
        self.tc = TreeCache(self.cuts, samples, path, config, filelist, mergeplot, sample_to_merge, mergeCachingPart, plotMergeCached, branch_to_keep, False, dccut, remove_sys)  # created cached tree i.e. create new skimmed trees using the list of cuts
        if filelist and len(filelist)>0 or mergeplot or sample_to_merge:
            print('ONLY CACHING PERFORMED, EXITING');
            return 
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"
示例#10
0
    def __init__(self, samples, path, config, optionsList,GroupDict=None,filelist=None,mergeplot=False):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #! Read arguments and initialise variables

        if filelist:
            print 'len(filelist)',len(filelist)
        #print "Start Creating HistoMaker"
        #print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "Cuts:",self.cuts
        self.tc = TreeCache(self.cuts,samples,path,config,filelist,mergeplot)# created cached tree i.e. create new skimmed trees using the list of cuts
        if len(filelist)>0 or mergeplot:
            print('ONLY CACHING PERFORMED, EXITING');
            sys.exit(1)
        #print self.cuts
        # self.tc = TreeCache(self.cuts,samples,path,config)
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"
示例#11
0
文件: HistoMaker.py 项目: GLP90/Xbb
class HistoMaker:
    def __init__(self, samples, path, config, optionsList, GroupDict=None, filelist=None, mergeplot=False, sample_to_merge=None, mergeCachingPart=-1, plotMergeCached=False,  branch_to_keep=None, dccut=None, remove_sys=None):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #mergeCachingPart: number of the output file in mergecaching step
        #plotMergeCached: use partially merged files from mergecaching and merge completely before plotting
        #! Read arguments and initialise variables

        if filelist:
            print 'len(filelist)',len(filelist)
        #print "Start Creating HistoMaker"
        #print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        #self.Custom_BDT_bins = None
        self.BDTmin= None
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "The cut is:",self.cuts
        self.tc = TreeCache(self.cuts, samples, path, config, filelist, mergeplot, sample_to_merge, mergeCachingPart, plotMergeCached, branch_to_keep, False, dccut, remove_sys)  # created cached tree i.e. create new skimmed trees using the list of cuts
        if filelist and len(filelist)>0 or mergeplot or sample_to_merge:
            print('ONLY CACHING PERFORMED, EXITING');
            return 
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"
    def get_histos_from_tree_dc(self,job,quick=True, subcut_ = None, replacement_cut = None, nomOnly = False):
        '''Function that produce the trees from a HistoMaker, optimised for dc (in case of lot of sys). This concerns only MC.'''

        print "=============================================================\n"
        print "THE SAMPLE IS ",job.name
        print "=============================================================\n"


        if self.lumi == 0:
            lumi = self.config.get('Plot_general','lumi')
            #print("You're trying to plot with no lumi, I will use ",lumi)
            self.lumi = lumi

        hTreeList=[]

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis','TrainFlag'))

        # #Remove EventForTraining in order to run the MVA directly from the PREP step

        if not 'PSI' in self.config.get('Configuration','whereToLaunch'):
#            BDT_add_cut='((evt%2) == 0 || Alt$(isData,0))'
            if 'ZJets_amc' in job.name:
                print 'No training cut for the sample', job.name
                BDT_add_cut='1'
            else:
                BDT_add_cut='((evt%2) == 0 || isData)'
        else:
            print 'Adding training cut'
            UseTrainSample = eval(self.config.get('Analysis','UseTrainSample'))
            if UseTrainSample:
                BDT_add_cut='((evt%2) == 0 || isData)'
            else:
                if 'ZJets_amc' in job.name:
                    print 'No evt%2 cut for sample', job.name
                    BDT_add_cut='1'
                else:
                    BDT_add_cut='!((evt%2) == 0 || isData)'

        plot_path = self.config.get('Directories','plotpath')
        addOverFlow=eval(self.config.get('Plot_general','addOverFlow'))

        # get all Histos at once
        addCut = '1' #'(%s)&&(%s)'%(self.tc.minCut, job.subcut) #debug!!
        print 'subcut_ is', subcut_
        if subcut_:
            addCut = subcut_
        print 'addCut is', addCut

        # get the filenames for the root files to be read into the tree, and fill the count histograms
        rootFileNames = self.tc.get_tree(job, addCut)

        #read options to prepare histo and TTreeFormula
        DrawInfoDicList = []
        #keys are var, cut, weight, htree, type
#        hTreeList=[]
        #varList=[]
#        cutList=[]
#        weightList=[]
#        sysTypeList=[]

        index_ = 0
        for options in self.optionsList:

            index_ += 1
            DrawInfoDic = {}

            DrawInfoDic['type'] = options['sysType']
            #sysTypeList.append(options['sysType'])

            if self.optionsList.index(options) == 0:
                print 'This is the nominal histo, going to save him separatly'

            name=job.name
            if self.GroupDict is None:
                group=job.group
            else:
                group=self.GroupDict[job.name]
            treeVar=options['var']
            DrawInfoDic['var'] = treeVar
            name=options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin=float(options['xMin'])
            xMax=float(options['xMax'])
            weightF=options['weight']
            if 'SBweight' in options:
                SBweight=options['SBweight']
            else: SBweight = None
            print 'SBweight is', SBweight
            #Include weight per sample (specialweight)
            if 'PSI' in self.config.get('Configuration','whereToLaunch'):
                weightF="("+weightF+")"
                #weightF="("+weightF+")*(" + job.specialweight +")"
            else:
                weightF="("+weightF+")"
                #weightF="("+weightF+")*(" + job.specialweight +")"

            #weightList.append(weightF)
            DrawInfoDic['weight'] = weightF

            if 'countHisto' in options.keys() and 'countbin' in options.keys():
                count=getattr(self.tc,options['countHisto'])[options['countbin']]
            else:
                count=getattr(self.tc,"CountWeighted")[0]

            treeCut='%s & %s'%(options['cut'],addCut)
            if replacement_cut:
                if type(replacement_cut) is str:
                    treeCut='%s & %s'%(replacement_cut,addCut)
                elif type(replacement_cut) is list:
                    treeCut='%s & %s'%(replacement_cut[(self.optionsList).index(options)],addCut)
                else:
                    print '@ERROR: replacement_cut is neither list or string. Aborting'
                    sys.exit()

            hTree = ROOT.TH1F('%s_%s_%i'%(name,job.name,index_),'%s'%name,nBins,xMin,xMax)

            hTree.Sumw2()
            hTree.SetTitle(job.name)

            DrawInfoDic['htree'] = hTree
            #hTreeList.append(hTree)


            drawoption = ''
            #print 'treeVar: %s'%(treeVar)
            #print 'weightF: %s'%(weightF)
            #print 'BDT_add_cut: %s'%(BDT_add_cut)
            #print 'treeCut: %s'%(treeCut)

#            print("START DRAWING")
            if job.type != 'DATA':
              #if CuttedTree and CuttedTree.GetEntries():
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:#added OPT for BDT optimisation
                    drawoption = '(%s)*(%s & %s)'%(weightF,BDT_add_cut,treeCut)
                    #print "I'm appling: ",BDT_add_cut
                    #cutList.append('(%s & %s)'%(BDT_add_cut,treeCut))
                    DrawInfoDic['cut'] = '(%s & %s)'%(BDT_add_cut,treeCut)
                else:
                    drawoption = '(%s)*(%s)'%(weightF,treeCut)
                    #cutList.append('(%s)'%(treeCut))
                    DrawInfoDic['cut'] = '(%s)'%(treeCut)
                #Not Drawing yet
                ###ROOT.gROOT.ProcessLine('.L /mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/myutils/TreeDraw.C')
                ###TD = ROOT.treedraw()
                ####print 'drawoptions are', drawoption
                ###hTree = TD.TreeDraw(CuttedTree, hTree, '%s>>%s' %(treeVar,name), drawoption)
                ###full=True

            DrawInfoDicList.append(DrawInfoDic)
            if nomOnly: break

        ###
        ##Prepare loop on the TTree
        ###

        # in case of a list of files, read them as a TChain
        if type(rootFileNames) is list:
            CuttedTree = ROOT.TChain(job.tree)
            for rootFileName in rootFileNames:
                status = CuttedTree.Add(rootFileName + '/' + job.tree, 0)
                if status != 1:
                    print ('ERROR: in HistoMaker.py, cannot add file to chain:'+rootFileName)
            input = None
        # otherwise as a TFile for backwards compatibility
        else:
            input = ROOT.TFile.Open(rootFileNames,'read')
            #Not: no subcut is needed since  done in caching
            #if job.subsample:
            #    addCut += '& (%s)' %(job.subcut)
            CuttedTree = input.Get(job.tree)
            CuttedTree.SetCacheSize(0)
        #print 'CuttedTree.GetEntries()',CuttedTree.GetEntries()

        ##Check if branch exists (for debugging purpose)
        #if CuttedTree.GetEntries() > 0:
        #    bl = CuttedTree.GetListOfBranches()
        #    Bfound = False
        #    for b in bl:
        #        print 'branch name is', b.GetName()
        #        if b.GetName() == 'genWeight':
        #            Bfound = True
        #    if not Bfound:
        #        print 'Warning: branch genWeight doesnt exist in sample', job.name

        if CuttedTree.GetEntries() > 0:
            DrawFormulaNom_var    = ROOT.TTreeFormula('DrawFormulaNom_var',    DrawInfoDicList[0]['var'] ,  CuttedTree)
            DrawFormulaNom_cut    = ROOT.TTreeFormula('DrawFormulaNom_cut',    DrawInfoDicList[0]['cut'] ,  CuttedTree)
            DrawFormulaNom_weight = ROOT.TTreeFormula('DrawFormulaNom_weight', DrawInfoDicList[0]['weight'],CuttedTree)
            #print 'nominal var is', DrawInfoDicList[0]['var']
            #print 'nominal cut is', DrawInfoDicList[0]['cut']
            #print 'nominal weight is', DrawInfoDicList[0]['weight']

        ###
        #Define formulas for systematics
        ###
        DrawFormulaDicList = []
        for DrawInfoDic in DrawInfoDicList:
            DrawFormulaDic = {}
            if CuttedTree.GetEntries() > 0:
                index_ = DrawInfoDicList.index(DrawInfoDic)#such that the formula's have different name
                if DrawInfoDic['type'] == 'nominal':
                    pass
                elif DrawInfoDic['type'] == 'weight':
                    #plot of weight sys. Only Up/Down weights are needed here
                    #print'weight formula is', DrawInfoDic['weight']
                    DrawFormulaDic['weight'] = ROOT.TTreeFormula("weightFormula_%i"%index_, DrawInfoDic['weight'],CuttedTree)
                elif DrawInfoDic['type'] == 'shape':
                    #plot of shape sys. Only var and cut are needed here
                    #print'var formula is', DrawInfoDic['var']
                    DrawFormulaDic['var'] = ROOT.TTreeFormula("varFormula_%i"%index_, DrawInfoDic['var'] ,CuttedTree)
                    #print'cut formula is', DrawInfoDic['cut']
                    DrawFormulaDic['cut'] = ROOT.TTreeFormula("cutFormula_%i"%index_, DrawInfoDic['cut'] ,CuttedTree)
                else:
                    print '@ERROR: type should be either nominal, weight or shape. Aborting'
                    sys.exit()
            #copy type and histo
            DrawFormulaDic['type'] =  DrawInfoDic['type']
            DrawFormulaDic['htree'] = DrawInfoDic['htree']

            DrawFormulaDicList.append(DrawFormulaDic)

        if CuttedTree.GetEntries() > 0:
            #! start the loop over variables (descriebed in options)
            First_iter = True

            nEntries = CuttedTree.GetEntries()
            i = 0
            oldTreeNum = -1
            #for entry in range(0,nEntries):
            for event in CuttedTree:
                #if entry > 1000: break
                #print'================'
                #print 'entry is', entry
                #print'================'

                CuttedTree.LoadTree(i)
                treeNum = CuttedTree.GetTreeNumber()
                #update the leaves
                if treeNum != oldTreeNum:
                    #nominal
                    DrawFormulaNom_var.UpdateFormulaLeaves()
                    DrawFormulaNom_cut.UpdateFormulaLeaves()
                    DrawFormulaNom_weight.UpdateFormulaLeaves()

                    for DrawFormulaDic in DrawFormulaDicList:
                        #update all the formula for the ploting
                        if DrawFormulaDic['type'] == 'nominal':
                            pass
                        elif DrawFormulaDic['type'] == 'weight':
                            DrawFormulaDic['weight'].UpdateFormulaLeaves()
                        elif DrawFormulaDic['type'] == 'shape':
                            DrawFormulaDic['var'].UpdateFormulaLeaves()
                            DrawFormulaDic['cut'].UpdateFormulaLeaves()
                    oldTreeNum = treeNum

                DrawFormulaNom_var.GetNdata()
                DrawFormulaNom_cut.GetNdata()
                DrawFormulaNom_weight.GetNdata()

                var = DrawFormulaNom_var.EvalInstance()
                cut = DrawFormulaNom_cut.EvalInstance()
                weight = DrawFormulaNom_weight.EvalInstance()

                #print ''
                #print 'nominal outcomes are'
                #print 'var:', var
                #print 'cut:', cut
                #print 'weight:', weight
                #print ''
                #Fill Nominal Histo
                DrawFormulaDicList[0]['htree'].Fill(var, weight*cut)

                var2    = None
                cut2    = None
                weight2 = None
                #loop over all different sys, fill the histos
                for DrawFormulaDic in DrawFormulaDicList:
                    #update all the formula for the ploting
                    if DrawFormulaDic['type'] == 'nominal':
                       continue
                    elif DrawFormulaDic['type'] == 'weight':
                        var2    = var
                        cut2    = cut
                        DrawFormulaDic['weight'].GetNdata()
                        weight2 = DrawFormulaDic['weight'].EvalInstance()
                    elif DrawFormulaDic['type'] == 'shape':
                        DrawFormulaDic['cut'].GetNdata()
                        DrawFormulaDic['var'].GetNdata()
                        var2    = DrawFormulaDic['var'].EvalInstance()
                        cut2    = DrawFormulaDic['cut'].EvalInstance()
                        weight2 = weight
                    DrawFormulaDic['htree'].Fill(var2, weight2*cut2)

                i += 1
            #####
            #Rescaling the histograms

        for DrawFormulaDic in DrawFormulaDicList:
            hTree = DrawFormulaDic['htree']
            if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:
                if TrainFlag:
                    if 'ZJets_amc' in job.name:
                        print 'No rescale applied for the sample', job.name
                        MC_rescale_factor = 1.
                    else:
                        MC_rescale_factor=2. ##FIXME## only dataset used for training must be rescaled!!
                else:
                    MC_rescale_factor = 1.

                if 'LHE_weights_scale_wgt[0+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,0,self.lumi, count)*MC_rescale_factor
                elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,1,self.lumi, count)*MC_rescale_factor
                elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,2,self.lumi, count)*MC_rescale_factor
                elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,3,self.lumi, count)*MC_rescale_factor
                else:
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi, count)*MC_rescale_factor
            else:
                if 'LHE_weights_scale_wgt[0+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,0,self.lumi, count)
                elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,1,self.lumi, count)
                elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,2,self.lumi, count)
                elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(job,self.config,3,self.lumi, count)
                else:
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi, count)
            if ScaleFactor != 0:
                hTree.Scale(ScaleFactor)
            integral = hTree.Integral()
            # !! Brute force correction for histograms with negative integral (problems with datacard) !!
            if integral<0:
                hTree.Scale(-0.001)
            if addOverFlow:
                uFlow = hTree.GetBinContent(0)+hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX()+1)+hTree.GetBinContent(hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(0),2)+ROOT.TMath.Power(hTree.GetBinError(1),2))
                oFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()),2)+ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()+1),2))
                hTree.SetBinContent(1,uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(),oFlow)
                hTree.SetBinError(1,uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(),oFlowErr)
            hTree.SetDirectory(0)
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else:
                #print 'not rebinning %s'%job.name
                gDict[group] = hTree
#            print("STOP %s"%treeVar)
            hTreeList.append(gDict)
        First_iter = False
#        print "get_histos_from_tree DONE for ",job.name, "var", options['var'], " in ", str(time.time() - start_time)," s."

        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        return hTreeList

    def get_histos_from_tree(self,job,quick=True, subcut_ = None, replacement_cut = None, nomOnly = False):
        start_time = time.time()

        print "=============================================================\n"
        print "THE SAMPLE IS ",job.name
        print "=============================================================\n"

         
        #print "Begin to extract the histos from trees (get_histos_from_tree)"
        #print "=============================================================\n"

        if self.lumi == 0: 
            lumi = self.config.get('Plot_general','lumi')
            #print("You're trying to plot with no lumi, I will use ",lumi)
            self.lumi = lumi
         
        hTreeList=[]

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis','TrainFlag'))

        # #Remove EventForTraining in order to run the MVA directly from the PREP step

        if not 'PSI' in self.config.get('Configuration','whereToLaunch'):
#            BDT_add_cut='((evt%2) == 0 || Alt$(isData,0))'
            if 'ZJets_amc' in job.name:
                print 'No training cut for the sample', job.name
                BDT_add_cut='1'
            else:
                BDT_add_cut='((evt%2) == 0 || isData)'
        else:
            print 'Adding training cut'
            UseTrainSample = eval(self.config.get('Analysis','UseTrainSample'))
            if UseTrainSample:
                BDT_add_cut='((evt%2) == 0 || isData)'
            else:
                if 'ZJets_amc' in job.name:
                    print 'No evt%2 cut for sample', job.name
                    BDT_add_cut='1'
                else:
                    BDT_add_cut='!((evt%2) == 0 || isData)'

        plot_path = self.config.get('Directories','plotpath')
        addOverFlow=eval(self.config.get('Plot_general','addOverFlow'))

        # get all Histos at once
        addCut = '1' #'(%s)&&(%s)'%(self.tc.minCut, job.subcut) #debug!!
        print 'subcut_ is', subcut_
        if subcut_:
            addCut = subcut_
        #print 'addCut is', addCut

        # get the filenames for the root files to be read into the tree, and fill the count histograms
        rootFileNames = self.tc.get_tree(job, addCut)

        # in case of a list of files, read them as a TChain
        if type(rootFileNames) is list:
            CuttedTree = ROOT.TChain(job.tree)
            for rootFileName in rootFileNames:
                status = CuttedTree.Add(rootFileName + '/' + job.tree, 0)
                if status != 1:
                    print ('ERROR: in HistoMaker.py, cannot add file to chain:'+rootFileName)
            input = None
        # otherwise as a TFile for backwards compatibility
        else:
            input = ROOT.TFile.Open(rootFileNames,'read')
            #Not: no subcut is needed since  done in caching
            #if job.subsample:
            #    addCut += '& (%s)' %(job.subcut)
            CuttedTree = input.Get(job.tree)
            CuttedTree.SetCacheSize(0)
        print 'CuttedTree.GetEntries()',CuttedTree.GetEntries()

        print "All .root MERGED for ",job.name," in ", str(time.time() - start_time)," s."

        #! start the loop over variables (descriebed in options) 
        First_iter = True
        for options in self.optionsList:
            #print 'nomOnly is', nomOnly
            if self.optionsList.index(options) == 0:
                print 'This is the nominal histo, going to save him separatly'
            start_time = time.time()
            name=job.name
            if self.GroupDict is None:
                group=job.group
            else:
                group=self.GroupDict[job.name]
            treeVar=options['var']
            name=options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin=float(options['xMin'])
            xMax=float(options['xMax'])
            weightF=options['weight']
            if 'SBweight' in options:
                SBweight=options['SBweight']
            else: SBweight = None
            #print 'SBweight is', SBweight

            #Include weight per sample (specialweight)
            if 'PSI' in self.config.get('Configuration','whereToLaunch'):
                weightF="("+weightF+")"
                #weightF="("+weightF+")*(" + job.specialweight +")"
            else:
                weightF="("+weightF+")"
                #weightF="("+weightF+")*(" + job.specialweight +")"

            if 'countHisto' in options.keys() and 'countbin' in options.keys():
                count=getattr(self.tc,options['countHisto'])[options['countbin']]
            else:
                count=getattr(self.tc,"CountWeighted")[0]

            #if cutOverWrite:
            #    treeCut= str(1)
            #else:
            #    treeCut='%s'%(options['cut'])
            treeCut='%s & %s'%(options['cut'],addCut)
            print "cut1:", options['cut']
            print "cut2:",addCut
            print "treecut:", treeCut
            if replacement_cut:
                if type(replacement_cut) is str:
                    treeCut='%s & %s'%(replacement_cut,addCut)
                elif type(replacement_cut) is list:
                    treeCut='%s & %s'%(replacement_cut[(self.optionsList).index(options)],addCut)
                else:
                    print '@ERROR: replacement_cut is neither list or string. Aborting'
                    sys.exit()
            
            hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)

            #If you use extension only
            hTree.Sumw2()
            hTree.SetTitle(job.name)
            #print('hTree.name() 1 =',hTree.GetName())
            #print('treeVar 1 =',treeVar)
            drawoption = ''
            #print 'treeVar: %s'%(treeVar)
            #print 'weightF: %s'%(weightF)
            #print 'BDT_add_cut: %s'%(BDT_add_cut)
            #print 'treeCut: %s'%(treeCut)

            if job.type != 'DATA':
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:#added OPT for BDT optimisation
                    drawoption = '(%s)*(%s & %s)'%(weightF,BDT_add_cut,treeCut)
                    #print "I'm appling: ",BDT_add_cut
                    #drawoption = 'sign(genWeight)*(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
                    #print drawoption
                else: 
                    drawoption = '(%s)*(%s)'%(weightF,treeCut)
                #print ('Draw: %s>>%s' %(treeVar,name), drawoption, "goff,e")
                #print 'Are the drawoptions a string ?', isinstance(drawoption, basestring)
                #import pdb
                #if len(drawoption) > 650:
                #    pdb.set_trace()
                #nevents = CuttedTree.Draw('%s>>%s' %(treeVar,name), ROOT.TCut(drawoption), "goff,e")
                #treeVar = '1'
                #ROOT.gROOT.ProcessLine('.L /mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/myutils/TreeDraw.C')
                #from ROOT import TreeDraw
                #TD = ROOT.treedraw()
                #print 'drawoptions are', drawoption
                #Make sure sample used for sample systematics are used/skiped

                
                sample_sys_dic = options['sample_sys_dic'] if 'sample_sys_dic' in options else {}
                
                if job.name in sample_sys_dic and not sample_sys_dic[job.name]:
                    print 'sample', job.name, ' will not be ploted'
                    print 'job.name is', job.name
                    print 'sample_sys_dic is', sample_sys_dic
                else:
                    print 'sample', job.name, ' will be ploted'
                    #hTree = TD.TreeDraw(CuttedTree, hTree, '%s>>%s' %(treeVar,name), drawoption)
                    #nevents = CuttedTree.Draw('%s>>%s' %(treeVar,name), str(drawoption), "goff,e")
                    nevents = CuttedTree.Draw('%s>>%s' %(treeVar,name), str(drawoption), "goff,e")
                #if First_iter: print 'Number of events are', nevents
                #print 'nevents:',hTree.GetEntries(),' hTree.name() 2 =',hTree.GetName()
                full=True
            elif job.type == 'DATA':

                if not SBweight == None:
                    treeCutData = '('+treeCut+')*('+SBweight+')'
                else:
                    treeCutData = treeCut
                print 'treeCutData is', treeCutData

                if options['blind']:
                    lowLimitBlindingMass    = 90
                    highLimitBlindingMass   = 140
                    lowLimitBlindingBDT     = 0.4
                    lowLimitBlindingDR      = 0.8
                    highLimitBlindingDR     = 1.6

                    if 'mass' in treeVar:
                        print '@ERROR: removed cut on the mass'
                        sys.exit()
                        lowLimitBlindingMass =hTree.GetBinLowEdge(hTree.FindBin(lowLimitBlindingMass))
                        highLimitBlindingMass =hTree.GetBinLowEdge(hTree.FindBin(highLimitBlindingMass))+ hTree.GetBinWidth(hTree.GetBin(highLimitBlindingMass))
                        veto = ("(%s <%s || %s > %s)" %(treeVar,lowLimitBlindingMass,treeVar,highLimitBlindingMass))
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),veto +'&'+' %(cut)s' %options, "goff,e")
                    elif 'BDT' in treeVar or 'bdt' in treeVar or 'nominal' in treeVar in treeVar:
                        lowLimitBlindingBDT = hTree.GetBinLowEdge(hTree.FindBin(lowLimitBlindingBDT))
                        veto = "(%s <%s)" %(treeVar,lowLimitBlindingBDT)
                        print 'I will add the veto', veto
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),veto +'&'+' %(cut)s'%options, "goff,e")
                    elif 'dR' in treeVar and 'H' in treeVar:
                        lowLimit   = hTree.GetBinLowEdge(hTree.FindBin(lowLimitBlindingDR))
                        highLimit  = hTree.GetBinLowEdge(hTree.FindBin(highLimitBlindingDR))
                        veto = ("(%s <%s || %s > %s)" %(treeVar,lowLimitBlindingMass,treeVar,highLimitBlindingMass))
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),veto +'&'+' %(cut)s'%options, "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),'%s' %treeCutData, "goff,e")
                else:
                    CuttedTree.Draw('%s>>%s' %(treeVar,name),'%s' %treeCutData, "goff,e")
                full = True
            # if full:
                # hTree = ROOT.gDirectory.Get(name)
                # print('histo1',ROOT.gDirectory.Get(name))
            # else:
                # hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
                # hTree.Sumw2()
                # print('histo2',ROOT.gDirectory.Get(name))
#            print("END DRAWING")
#            print("START RESCALE")
            # if full: print 'hTree',hTree.GetName()
              
            if job.type != 'DATA':
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:
                    if TrainFlag:
                        if 'ZJets_amc' in job.name:
                            print 'No rescale applied for the sample', job.name
                            MC_rescale_factor = 1.
                        else:
                            MC_rescale_factor=2. ##FIXME## only dataset used for training must be rescaled!!
                    else: 
                        MC_rescale_factor = 1.

                    if 'LHE_weights_scale_wgt[0+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,0,self.lumi, count)*MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,1,self.lumi, count)*MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,2,self.lumi, count)*MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,3,self.lumi, count)*MC_rescale_factor
                    else:
                        ScaleFactor = self.tc.get_scale(job,self.config,self.lumi, count)*MC_rescale_factor
                else: 
                    if 'LHE_weights_scale_wgt[0+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,0,self.lumi, count)
                    elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,1,self.lumi, count)
                    elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,2,self.lumi, count)
                    elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(job,self.config,3,self.lumi, count)
                    else:
                        ScaleFactor = self.tc.get_scale(job,self.config,self.lumi, count)
                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)
                integral = hTree.Integral()
                #print '\t-->import %s\t Integral: %s'%(job.name,integral)
                #print("job:",job.name," ScaleFactor=",ScaleFactor)
                #print("END RESCALE")
                #print("START addOverFlow")
                # !! Brute force correction for histograms with negative integral (problems with datacard) !!
                if integral<0:
                    hTree.Scale(-0.001)
                    #print "#"*30
                    #print "#"*30
                    #print "original integral was:",integral
                    #print "now is:", hTree.Integral()
                    #print "#"*30
                    #print "#"*30
            if addOverFlow:
                uFlow = hTree.GetBinContent(0)+hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX()+1)+hTree.GetBinContent(hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(0),2)+ROOT.TMath.Power(hTree.GetBinError(1),2))
                oFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()),2)+ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()+1),2))
                hTree.SetBinContent(1,uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(),oFlow)
                hTree.SetBinError(1,uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(),oFlowErr)
            hTree.SetDirectory(0)
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else: 
                print 'not rebinning %s'%job.name
                gDict[group] = hTree
#            print("STOP %s"%treeVar)
            hTreeList.append(gDict)
            First_iter = False
            #print "get_histos_from_tree DONE for ",job.name, "var", options['var'], " in ", str(time.time() - start_time)," s."
            #if self.optionsList.index(options) == 0:
            #    c = ROOT.TCanvas('c','c')
            #    c.cd()
            #    hTree.Draw()
            #    c.SaveAs('/mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/TESTDC/'+group+hTree.GetName()+job.name+'.root')
            #    c.SaveAs('/mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/TESTDC/'+group+hTree.GetName()+job.name+'.C')
            #    break
            if nomOnly: break

        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        #print "Finished to extract the histos from trees (get_histos_from_tree)"
        #print "================================================================\n"
        #print "get_histos_from_tree DONE for ",job.name," in ", str(time.time() - start_time)," s."
        return hTreeList
       
    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.35, dc_step = False):
        #print "START calc_rebin"
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i=0
        #add all together:
        print '\n\t...calculating rebinning...'
        for job in bg_list:
            #print "job",job
            if dc_step:
                #htree = self.get_histos_from_tree_dc(job)[0].values()[0]
                #self,job,quick=True, subcut_ = None, replacement_cut = None, nomOnly = False
                #htree = self.get_histos_from_tree(job)[0].values()[0]
                #htree = self.get_histos_from_tree_dc(job, True, None, None, True)[0].values()[0]
                htree = self.get_histos_from_tree(job, True, None, None, True)[0].values()[0]
            else:
                #self,job,quick=True, subcut_ = None, replacement_cut = None, nomOnly = False
                #htree = self.get_histos_from_tree(job)[0].values()[0]
                #htree = self.get_histos_from_tree()[0].values()[0]
                htree = self.get_histos_from_tree(job, True, None, None, True)[0].values()[0]
            print "Integral",job,htree.Integral()
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree,1)
            del htree
            i+=1
        ErrorR=0
        ErrorL=0
        TotR=0
        TotL=0
        binR=self.rebin_nBins
        binL=1
        rel=1.0
        print "START loop from right"
        #print "totalBG.Draw("","")",totalBG.Integral()
        #---- from right
        while rel > tolerance :
            TotR+=totalBG.GetBinContent(binR)
            ErrorR=sqrt(ErrorR**2+totalBG.GetBinError(binR)**2)
            binR-=1
            if binR < 0: break
            if TotR < 1.: continue
            print 'binR is', binR
            print 'TotR is', TotR
            print 'ErrorR is', ErrorR
            if not TotR <= 0 and not ErrorR == 0:
                rel=ErrorR/TotR
                print 'rel is',  rel
        print 'upper bin is %s'%binR
        print "END loop from right"

        #---- from left

        rel=1.0
        print "START loop from left"
        while rel > tolerance:
            TotL+=totalBG.GetBinContent(binL)
            ErrorL=sqrt(ErrorL**2+totalBG.GetBinError(binL)**2)
            binL+=1
            if binL > nBins_start: break
            if TotL < 1.: continue
            if not TotL <= 0 and not ErrorL == 0:
                rel=ErrorL/TotL
                print rel
        #it's the lower edge
        print "STOP loop from left"
        binL+=1
        print 'lower bin is %s'%binL

        inbetween=binR-binL
        stepsize=int(inbetween)/(int(self.norebin_nBins)-2)
        modulo = int(inbetween)%(int(self.norebin_nBins)-2)

        print 'stepsize %s'% stepsize
        print 'modulo %s'%modulo
        binlist=[binL]
        for i in range(0,int(self.norebin_nBins)-3):
            binlist.append(binlist[-1]+stepsize)
        binlist[-1]+=modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins+1)
        print 'binning set to %s'%binlist
        #print "START REBINNER"
        #if not self.Custom_BDT_bins:
        #    self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        #else:
        if self.BDTmin:
            if not type(self.BDTmin) is list: self.BDTmin = [self.BDTmin]
            #default from the rebinner
            default_rebin = array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist])
            #cutsom lower bins
            custom_rebin = array('d',self.BDTmin)
            #now making event bins between rightmost bin and cutom bin i.e. [self.BDTmin, event bins, last bin]
            print 'The original number of bins is', len(default_rebin) -1
            nBetween_bins = (len(default_rebin) -1) - 1 - (len(self.BDTmin) -1)
            if default_rebin[-2] < self.BDTmin[-1]:
                print '@ERROR: the custom lower BDT range contains lower edge of rightmost bin. Please change the range definiton. Aborting'
                sys.exit()
            custom_step = (default_rebin[-2] - self.BDTmin[-1])/(nBetween_bins*1.0)
            for b in range(1,nBetween_bins):
                custom_rebin.append(self.BDTmin[-1] + b*custom_step)
            custom_rebin.append(default_rebin[-2])
            custom_rebin.append(1)
            print 'custom_rebin is', custom_rebin
            print 'the final number of bins is', len(custom_rebin) -1
            self.mybinning = Rebinner(len(custom_rebin) -1 ,custom_rebin,True,True)
            #else:
            #if type(self.BDTmin) is list:
            #    #default from the rebinner
            #    default_rebin = array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist])
            #    #cutsom lower bins
            #    custom_rebin = array('d',self.BDTmin)

            #    #now making event bins between rightmost bin and cutom bin i.e. [self.BDTmin, event bins, last bin]
            #    nBetween_bins = self.norebin_nBins - 1 - len(self.BDTmin)
            #    custom_step = (default_rebin[-1] - self.BDTmin[-1])/(nBetween_bins*1.0)
            #    for b in range(1,nBetween_bins):
            #        custom_rebin.append(self.BDTmin[-1] + b*custom_step)
            #    custom_rebin.append(default_rebin[-1])
            #    custom_rebin.append(1)
            #    #for b in default_rebin:
            #    #    if b <= self.BDTmin[-1]: continue
            #    #    custom_rebin.append(b)
            #else:
            #    default_rebin = array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist])
            #    custom_rebin = array('d',[self.BDTmin])
            #    for b in default_rebin:
            #        if b <= self.BDTmin: continue
            #        custom_rebin.append(b)
            #    self.mybinning = Rebinner(len(custom_rebin) -1 ,custom_rebin),True,self.BDTmin)
        else:
            self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
            #self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-0.8]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        #self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[0.]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts,setup):
        '''
        Setup is defined in the plot conf file
        histo_dicts contains an array of dictionnary
        '''

        from array import array
        doubleVariable = array('d',[0])
        
        #print "Start orderandadd"
        #print "=================\n"
        #print "Input dict is", histo_dicts

        ordered_histo_dict = {}
        #print "orderandadd-setup",setup
        #print "orderandadd-histo_dicts",histo_dicts
        for sample in setup:
            sumintegral = 0
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    integral = histo_dict[sample].IntegralAndError(0,histo_dict[sample].GetNbinsX(),doubleVariable)
                    error = doubleVariable[0]
                    entries = histo_dict[sample].GetEntries()
                    subsamplename = histo_dict[sample].GetTitle()
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    printc('magenta','','\t--> added %s to %s Integral: %s Entries: %s Error: %s'%(subsamplename,sample,integral,entries,error))
                    sumintegral += integral
                    nSample += 1
            #print 'The final integral is %s' % sumintegral
        del histo_dicts
        #print "Output dict is", ordered_histo_dict
        return ordered_histo_dict 
示例#12
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(vhbb_name_space)

        self.__weight = config.get("TrainRegression", "weight")
        self.__vars = config.get("TrainRegression", "vars").split()
        self.__target = config.get("TrainRegression", "target")
        self.__cut = config.get("TrainRegression", "cut")
        self.__title = config.get("TrainRegression", "name")
        self.__signals = config.get("TrainRegression", "signals")
        self.__regOptions = config.get("TrainRegression", "options")
        path = config.get('Directories', 'PREPout')
        samplesinfo = config.get('Directories', 'samplesinfo')
        self.__info = ParseInfo(samplesinfo, path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut], self.__samples, path, config)
        self.__trainCut = config.get("TrainRegression", "trainCut")
        self.__testCut = config.get("TrainRegression", "testCut")
        self.__config = config

    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG' % job.name
            signals.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__trainCut)))
            signalsTest.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__testCut)))

        sWeight = 1.
        fnameOutput = 'training_Reg_%s.root' % (self.__title)
        output = ROOT.TFile.Open(fnameOutput, "RECREATE")

        factory = ROOT.TMVA.Factory(
            'MVA', output,
            '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression'
        )
        #factory.SetSignalWeightExpression( self.__weight )
        #set input trees
        for i, signal in enumerate(signals):
            factory.AddRegressionTree(signal, sWeight,
                                      ROOT.TMVA.Types.kTraining)
            factory.AddRegressionTree(signalsTest[i], sWeight,
                                      ROOT.TMVA.Types.kTesting)
        self.__apply = []
        p = re.compile(r'hJet_\w+')
        for var in self.__vars:
            factory.AddVariable(var, 'D')  # add the variables
            self.__apply.append(p.sub(r'\g<0>[0]', var))

        factory.AddTarget(self.__target)
        mycut = ROOT.TCut(self.__cut)
        factory.BookMethod(ROOT.TMVA.Types.kBDT, 'BDT_REG_%s' % (self.__title),
                           self.__regOptions)  # book an MVA method
        factory.TrainAllMethods()
        factory.TestAllMethods()
        factory.EvaluateAllMethods()
        output.Write()
        regDict = dict(zip(self.__vars, self.__apply))
        self.__config.set('Regression', 'regWeight',
                          '../data/MVA_BDT_REG_%s.weights.xml' % self.__title)
        self.__config.set('Regression', 'regDict', '%s' % regDict)
        self.__config.set('Regression', 'regVars', '%s' % self.__vars)
        for section in self.__config.sections():
            if not section == 'Regression':
                self.__config.remove_section(section)
        with open('8TeVconfig/appReg', 'w') as configfile:
            self.__config.write(configfile)
        with open('8TeVconfig/appReg', 'r') as configfile:
            for line in configfile:
                print line.strip()
示例#13
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(vhbb_name_space)
        
        self.__weight = config.get("TrainRegression","weight")
        self.__vars = config.get("TrainRegression","vars").split()
        self.__target = config.get("TrainRegression","target")
        self.__cut = config.get("TrainRegression","cut")
        self.__title = config.get("TrainRegression","name")
        self.__signals = config.get("TrainRegression","signals")
        self.__regOptions = config.get("TrainRegression","options")
        path = config.get('Directories','PREPout')
        samplesinfo=config.get('Directories','samplesinfo')
        self.__info = ParseInfo(samplesinfo,path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut],self.__samples,path,config)
        self.__trainCut = config.get("TrainRegression","trainCut")
        self.__testCut = config.get("TrainRegression","testCut")
        self.__config = config
        
    
    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG'%job.name
            signals.append(self.__tc.get_tree(job,'%s & %s' %(self.__cut,self.__trainCut)))
            signalsTest.append(self.__tc.get_tree(job,'%s & %s'%(self.__cut,self.__testCut)))
        
        sWeight = 1.
        fnameOutput='training_Reg_%s.root' %(self.__title)
        output = ROOT.TFile.Open(fnameOutput, "RECREATE")

        factory = ROOT.TMVA.Factory('MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression')
        #factory.SetSignalWeightExpression( self.__weight )
        #set input trees
        for i, signal in enumerate(signals):
            factory.AddRegressionTree( signal,  sWeight, ROOT.TMVA.Types.kTraining)
            factory.AddRegressionTree( signalsTest[i],  sWeight, ROOT.TMVA.Types.kTesting)
        self.__apply = []
        p = re.compile(r'hJet_\w+')
        for var in self.__vars:
            factory.AddVariable(var,'D') # add the variables
            self.__apply.append(p.sub(r'\g<0>[0]', var))
            print (self.__apply)
            
        factory.AddTarget( self.__target )
        mycut = ROOT.TCut( self.__cut )
        factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) # book an MVA method
        factory.TrainAllMethods()
        factory.TestAllMethods()
        factory.EvaluateAllMethods()
        output.Write()
        regDict = dict(zip(self.__vars, self.__apply)) 
        self.__config.set('Regression', 'regWeight', '../data/MVA_BDT_REG_%s.weights.xml' %self.__title)
        self.__config.set('Regression', 'regDict', '%s' %regDict)
        self.__config.set('Regression', 'regVars', '%s' %self.__vars)
        for section in self.__config.sections():
            if not section == 'Regression':
                self.__config.remove_section(section)
        with open('8TeVconfig/appReg', 'w') as configfile:
            self.__config.write(configfile)
        with open('8TeVconfig/appReg', 'r') as configfile:
            for line in configfile:
                print line.strip()
示例#14
0
class HistoMaker:
    def __init__(self, samples, path, config, optionsList, GroupDict=None):
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi = 0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print self.cuts
        #self.tc = TreeCache(self.cuts,samples,path)
        self.tc = TreeCache(self.cuts, samples, path, config)
        self._rebin = False
        self.mybinning = None
        self.GroupDict = GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

    def get_histos_from_tree(self, job, cutOverWrite=None):
        if self.lumi == 0:
            raise Exception("You're trying to plot with no lumi")

        hTreeList = []

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis', 'TrainFlag'))
        BDT_add_cut = 'EventForTraining == 0'

        plot_path = self.config.get('Directories', 'plotpath')
        addOverFlow = eval(self.config.get('Plot_general', 'addOverFlow'))

        # get all Histos at once
        CuttedTree = self.tc.get_tree(job, '1')
        for options in self.optionsList:
            name = job.name
            if self.GroupDict is None:
                group = job.group
            else:
                group = self.GroupDict[job.name]
            treeVar = options['var']
            name = options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin = float(options['xMin'])
            xMax = float(options['xMax'])
            weightF = options['weight']
            if cutOverWrite:
                treeCut = cutOverWrite
            else:
                treeCut = '%s' % (options['cut'])

            #options

            if job.type != 'DATA':
                if CuttedTree.GetEntries():
                    if 'RTight' in treeVar or 'RMed' in treeVar:
                        drawoption = '(%s)*(%s & %s)' % (weightF, treeCut,
                                                         BDT_add_cut)
                        #print drawoption
                    else:
                        drawoption = '(%s)*(%s)' % (weightF, treeCut)
                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        (treeVar, name, nBins, xMin, xMax), drawoption,
                        "goff,e")
                    full = True
                else:
                    full = False
            elif job.type == 'DATA':
                if options['blind']:
                    if treeVar == 'H.mass':
                        CuttedTree.Draw(
                            '%s>>%s(%s,%s,%s)' %
                            (treeVar, name, nBins, xMin, xMax),
                            ' (%(var)s <90. || %(var)s > 150.) & %(cut)s' %
                            options, "goff,e")
                    else:
                        CuttedTree.Draw(
                            '%s>>%s(%s,%s,%s)' %
                            (treeVar, name, nBins, xMin, xMax),
                            '%(var)s < 0. & %(cut)s' % options, "goff,e")

                else:
                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        (treeVar, name, nBins, xMin, xMax), '%s' % treeCut,
                        "goff,e")
                full = True
            if full:
                hTree = ROOT.gDirectory.Get(name)
            else:
                hTree = ROOT.TH1F('%s' % name, '%s' % name, nBins, xMin, xMax)
                hTree.Sumw2()
            if job.type != 'DATA':
                if 'RTight' in treeVar or 'RMed' in treeVar:
                    if TrainFlag:
                        MC_rescale_factor = 2.
                        #print 'I RESCALE BY 2.0'
                    else:
                        MC_rescale_factor = 1.
                    ScaleFactor = self.tc.get_scale(
                        job, self.config, self.lumi) * MC_rescale_factor
                else:
                    ScaleFactor = self.tc.get_scale(job, self.config,
                                                    self.lumi)
                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)
            #print '\t-->import %s\t Integral: %s'%(job.name,hTree.Integral())
            if addOverFlow:
                uFlow = hTree.GetBinContent(0) + hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX() +
                                            1) + hTree.GetBinContent(
                                                hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(0), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(1), 2))
                oFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX() +
                                                       1), 2))
                hTree.SetBinContent(1, uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(), oFlow)
                hTree.SetBinError(1, uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(), oFlowErr)
            hTree.SetDirectory(0)
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else:
                #print 'not rebinning %s'%job.name
                gDict[group] = hTree
            hTreeList.append(gDict)
        CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        return hTreeList

    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.25):
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i = 0
        #add all together:
        print '\n\t...calculating rebinning...'
        for job in bg_list:
            htree = self.get_histos_from_tree(job)[0].values()[0]
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree, 1)
            del htree
            i += 1
        ErrorR = 0
        ErrorL = 0
        TotR = 0
        TotL = 0
        binR = self.rebin_nBins
        binL = 1
        rel = 1.0
        #---- from right
        while rel > tolerance:
            TotR += totalBG.GetBinContent(binR)
            ErrorR = sqrt(ErrorR**2 + totalBG.GetBinError(binR)**2)
            binR -= 1
            if not TotR == 0 and not ErrorR == 0:
                rel = ErrorR / TotR
                #print rel
        #print 'upper bin is %s'%binR

        #---- from left
        rel = 1.0
        while rel > tolerance:
            TotL += totalBG.GetBinContent(binL)
            ErrorL = sqrt(ErrorL**2 + totalBG.GetBinError(binL)**2)
            binL += 1
            if not TotL == 0 and not ErrorL == 0:
                rel = ErrorL / TotL
                #print rel
        #it's the lower edge
        binL += 1
        #print 'lower bin is %s'%binL

        inbetween = binR - binL
        stepsize = int(inbetween) / (int(self.norebin_nBins) - 2)
        modulo = int(inbetween) % (int(self.norebin_nBins) - 2)

        #print 'stepsize %s'% stepsize
        #print 'modulo %s'%modulo
        binlist = [binL]
        for i in range(0, int(self.norebin_nBins) - 3):
            binlist.append(binlist[-1] + stepsize)
        binlist[-1] += modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins + 1)
        #print 'binning set to %s'%binlist
        self.mybinning = Rebinner(
            int(self.norebin_nBins),
            array('d', [-1.0] + [totalBG.GetBinLowEdge(i) for i in binlist]),
            True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts, setup):
        ordered_histo_dict = {}
        for sample in setup:
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        printc('magenta', '',
                               '\t--> added %s to %s' % (sample, sample))
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    nSample += 1
        del histo_dicts
        return ordered_histo_dict
示例#15
0
class HistoMaker:
    def __init__(self,
                 samples,
                 path,
                 config,
                 optionsList,
                 GroupDict=None,
                 filelist=None,
                 mergeplot=False,
                 sample_to_merge=None,
                 mergeCachingPart=-1,
                 plotMergeCached=False,
                 branch_to_keep=None,
                 dccut=None,
                 remove_sys=None):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #mergeCachingPart: number of the output file in mergecaching step
        #plotMergeCached: use partially merged files from mergecaching and merge completely before plotting
        #! Read arguments and initialise variables

        if filelist:
            print 'len(filelist)', len(filelist)
        #print "Start Creating HistoMaker"
        #print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi = 0.
        self.cuts = []
        #self.Custom_BDT_bins = None
        self.BDTmin = None
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "The cut is:",self.cuts
        self.tc = TreeCache(
            self.cuts, samples, path, config, filelist, mergeplot,
            sample_to_merge, mergeCachingPart, plotMergeCached, branch_to_keep,
            False, dccut, remove_sys
        )  # created cached tree i.e. create new skimmed trees using the list of cuts
        if filelist and len(filelist) > 0 or mergeplot or sample_to_merge:
            print('ONLY CACHING PERFORMED, EXITING')
            return
        self._rebin = False
        self.mybinning = None
        self.GroupDict = GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"

    def get_histos_from_tree_dc(self,
                                job,
                                quick=True,
                                subcut_=None,
                                replacement_cut=None,
                                nomOnly=False):
        '''Function that produce the trees from a HistoMaker, optimised for dc (in case of lot of sys). This concerns only MC.'''

        print "=============================================================\n"
        print "THE SAMPLE IS ", job.name
        print "=============================================================\n"

        if self.lumi == 0:
            lumi = self.config.get('Plot_general', 'lumi')
            #print("You're trying to plot with no lumi, I will use ",lumi)
            self.lumi = lumi

        hTreeList = []

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis', 'TrainFlag'))

        # #Remove EventForTraining in order to run the MVA directly from the PREP step

        if not 'PSI' in self.config.get('Configuration', 'whereToLaunch'):
            #            BDT_add_cut='((evt%2) == 0 || Alt$(isData,0))'
            if 'ZJets_amc' in job.name:
                print 'No training cut for the sample', job.name
                BDT_add_cut = '1'
            else:
                BDT_add_cut = '((evt%2) == 0 || isData)'
        else:
            print 'Adding training cut'
            UseTrainSample = eval(self.config.get('Analysis',
                                                  'UseTrainSample'))
            if UseTrainSample:
                BDT_add_cut = '((evt%2) == 0 || isData)'
            else:
                if 'ZJets_amc' in job.name:
                    print 'No evt%2 cut for sample', job.name
                    BDT_add_cut = '1'
                else:
                    BDT_add_cut = '!((evt%2) == 0 || isData)'

        plot_path = self.config.get('Directories', 'plotpath')
        addOverFlow = eval(self.config.get('Plot_general', 'addOverFlow'))

        # get all Histos at once
        addCut = '1'  #'(%s)&&(%s)'%(self.tc.minCut, job.subcut) #debug!!
        print 'subcut_ is', subcut_
        if subcut_:
            addCut = subcut_
        print 'addCut is', addCut

        # get the filenames for the root files to be read into the tree, and fill the count histograms
        rootFileNames = self.tc.get_tree(job, addCut)

        #read options to prepare histo and TTreeFormula
        DrawInfoDicList = []
        #keys are var, cut, weight, htree, type
        #        hTreeList=[]
        #varList=[]
        #        cutList=[]
        #        weightList=[]
        #        sysTypeList=[]

        index_ = 0
        for options in self.optionsList:

            index_ += 1
            DrawInfoDic = {}

            DrawInfoDic['type'] = options['sysType']
            #sysTypeList.append(options['sysType'])

            if self.optionsList.index(options) == 0:
                print 'This is the nominal histo, going to save him separatly'

            name = job.name
            if self.GroupDict is None:
                group = job.group
            else:
                group = self.GroupDict[job.name]
            treeVar = options['var']
            DrawInfoDic['var'] = treeVar
            name = options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin = float(options['xMin'])
            xMax = float(options['xMax'])
            weightF = options['weight']
            if 'SBweight' in options:
                SBweight = options['SBweight']
            else:
                SBweight = None
            print 'SBweight is', SBweight
            #Include weight per sample (specialweight)
            if 'PSI' in self.config.get('Configuration', 'whereToLaunch'):
                weightF = "(" + weightF + ")"
                #weightF="("+weightF+")*(" + job.specialweight +")"
            else:
                weightF = "(" + weightF + ")"
                #weightF="("+weightF+")*(" + job.specialweight +")"

            #weightList.append(weightF)
            DrawInfoDic['weight'] = weightF

            if 'countHisto' in options.keys() and 'countbin' in options.keys():
                count = getattr(self.tc,
                                options['countHisto'])[options['countbin']]
            else:
                count = getattr(self.tc, "CountWeighted")[0]

            treeCut = '%s & %s' % (options['cut'], addCut)
            if replacement_cut:
                if type(replacement_cut) is str:
                    treeCut = '%s & %s' % (replacement_cut, addCut)
                elif type(replacement_cut) is list:
                    treeCut = '%s & %s' % (replacement_cut[
                        (self.optionsList).index(options)], addCut)
                else:
                    print '@ERROR: replacement_cut is neither list or string. Aborting'
                    sys.exit()

            hTree = ROOT.TH1F('%s_%s_%i' % (name, job.name, index_),
                              '%s' % name, nBins, xMin, xMax)

            hTree.Sumw2()
            hTree.SetTitle(job.name)

            DrawInfoDic['htree'] = hTree
            #hTreeList.append(hTree)

            drawoption = ''
            #print 'treeVar: %s'%(treeVar)
            #print 'weightF: %s'%(weightF)
            #print 'BDT_add_cut: %s'%(BDT_add_cut)
            #print 'treeCut: %s'%(treeCut)

            #            print("START DRAWING")
            if job.type != 'DATA':
                #if CuttedTree and CuttedTree.GetEntries():
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:  #added OPT for BDT optimisation
                    drawoption = '(%s)*(%s & %s)' % (weightF, BDT_add_cut,
                                                     treeCut)
                    #print "I'm appling: ",BDT_add_cut
                    #cutList.append('(%s & %s)'%(BDT_add_cut,treeCut))
                    DrawInfoDic['cut'] = '(%s & %s)' % (BDT_add_cut, treeCut)
                else:
                    drawoption = '(%s)*(%s)' % (weightF, treeCut)
                    #cutList.append('(%s)'%(treeCut))
                    DrawInfoDic['cut'] = '(%s)' % (treeCut)
                #Not Drawing yet
                ###ROOT.gROOT.ProcessLine('.L /mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/myutils/TreeDraw.C')
                ###TD = ROOT.treedraw()
                ####print 'drawoptions are', drawoption
                ###hTree = TD.TreeDraw(CuttedTree, hTree, '%s>>%s' %(treeVar,name), drawoption)
                ###full=True

            DrawInfoDicList.append(DrawInfoDic)
            if nomOnly: break

        ###
        ##Prepare loop on the TTree
        ###

        # in case of a list of files, read them as a TChain
        if type(rootFileNames) is list:
            CuttedTree = ROOT.TChain(job.tree)
            for rootFileName in rootFileNames:
                status = CuttedTree.Add(rootFileName + '/' + job.tree, 0)
                if status != 1:
                    print(
                        'ERROR: in HistoMaker.py, cannot add file to chain:' +
                        rootFileName)
            input = None
        # otherwise as a TFile for backwards compatibility
        else:
            input = ROOT.TFile.Open(rootFileNames, 'read')
            #Not: no subcut is needed since  done in caching
            #if job.subsample:
            #    addCut += '& (%s)' %(job.subcut)
            CuttedTree = input.Get(job.tree)
            CuttedTree.SetCacheSize(0)
        #print 'CuttedTree.GetEntries()',CuttedTree.GetEntries()

        ##Check if branch exists (for debugging purpose)
        #if CuttedTree.GetEntries() > 0:
        #    bl = CuttedTree.GetListOfBranches()
        #    Bfound = False
        #    for b in bl:
        #        print 'branch name is', b.GetName()
        #        if b.GetName() == 'genWeight':
        #            Bfound = True
        #    if not Bfound:
        #        print 'Warning: branch genWeight doesnt exist in sample', job.name

        if CuttedTree.GetEntries() > 0:
            DrawFormulaNom_var = ROOT.TTreeFormula('DrawFormulaNom_var',
                                                   DrawInfoDicList[0]['var'],
                                                   CuttedTree)
            DrawFormulaNom_cut = ROOT.TTreeFormula('DrawFormulaNom_cut',
                                                   DrawInfoDicList[0]['cut'],
                                                   CuttedTree)
            DrawFormulaNom_weight = ROOT.TTreeFormula(
                'DrawFormulaNom_weight', DrawInfoDicList[0]['weight'],
                CuttedTree)
            #print 'nominal var is', DrawInfoDicList[0]['var']
            #print 'nominal cut is', DrawInfoDicList[0]['cut']
            #print 'nominal weight is', DrawInfoDicList[0]['weight']

        ###
        #Define formulas for systematics
        ###
        DrawFormulaDicList = []
        for DrawInfoDic in DrawInfoDicList:
            DrawFormulaDic = {}
            if CuttedTree.GetEntries() > 0:
                index_ = DrawInfoDicList.index(
                    DrawInfoDic)  #such that the formula's have different name
                if DrawInfoDic['type'] == 'nominal':
                    pass
                elif DrawInfoDic['type'] == 'weight':
                    #plot of weight sys. Only Up/Down weights are needed here
                    #print'weight formula is', DrawInfoDic['weight']
                    DrawFormulaDic['weight'] = ROOT.TTreeFormula(
                        "weightFormula_%i" % index_, DrawInfoDic['weight'],
                        CuttedTree)
                elif DrawInfoDic['type'] == 'shape':
                    #plot of shape sys. Only var and cut are needed here
                    #print'var formula is', DrawInfoDic['var']
                    DrawFormulaDic['var'] = ROOT.TTreeFormula(
                        "varFormula_%i" % index_, DrawInfoDic['var'],
                        CuttedTree)
                    #print'cut formula is', DrawInfoDic['cut']
                    DrawFormulaDic['cut'] = ROOT.TTreeFormula(
                        "cutFormula_%i" % index_, DrawInfoDic['cut'],
                        CuttedTree)
                else:
                    print '@ERROR: type should be either nominal, weight or shape. Aborting'
                    sys.exit()
            #copy type and histo
            DrawFormulaDic['type'] = DrawInfoDic['type']
            DrawFormulaDic['htree'] = DrawInfoDic['htree']

            DrawFormulaDicList.append(DrawFormulaDic)

        if CuttedTree.GetEntries() > 0:
            #! start the loop over variables (descriebed in options)
            First_iter = True

            nEntries = CuttedTree.GetEntries()
            i = 0
            oldTreeNum = -1
            #for entry in range(0,nEntries):
            for event in CuttedTree:
                #if entry > 1000: break
                #print'================'
                #print 'entry is', entry
                #print'================'

                CuttedTree.LoadTree(i)
                treeNum = CuttedTree.GetTreeNumber()
                #update the leaves
                if treeNum != oldTreeNum:
                    #nominal
                    DrawFormulaNom_var.UpdateFormulaLeaves()
                    DrawFormulaNom_cut.UpdateFormulaLeaves()
                    DrawFormulaNom_weight.UpdateFormulaLeaves()

                    for DrawFormulaDic in DrawFormulaDicList:
                        #update all the formula for the ploting
                        if DrawFormulaDic['type'] == 'nominal':
                            pass
                        elif DrawFormulaDic['type'] == 'weight':
                            DrawFormulaDic['weight'].UpdateFormulaLeaves()
                        elif DrawFormulaDic['type'] == 'shape':
                            DrawFormulaDic['var'].UpdateFormulaLeaves()
                            DrawFormulaDic['cut'].UpdateFormulaLeaves()
                    oldTreeNum = treeNum

                DrawFormulaNom_var.GetNdata()
                DrawFormulaNom_cut.GetNdata()
                DrawFormulaNom_weight.GetNdata()

                var = DrawFormulaNom_var.EvalInstance()
                cut = DrawFormulaNom_cut.EvalInstance()
                weight = DrawFormulaNom_weight.EvalInstance()

                #print ''
                #print 'nominal outcomes are'
                #print 'var:', var
                #print 'cut:', cut
                #print 'weight:', weight
                #print ''
                #Fill Nominal Histo
                DrawFormulaDicList[0]['htree'].Fill(var, weight * cut)

                var2 = None
                cut2 = None
                weight2 = None
                #loop over all different sys, fill the histos
                for DrawFormulaDic in DrawFormulaDicList:
                    #update all the formula for the ploting
                    if DrawFormulaDic['type'] == 'nominal':
                        continue
                    elif DrawFormulaDic['type'] == 'weight':
                        var2 = var
                        cut2 = cut
                        DrawFormulaDic['weight'].GetNdata()
                        weight2 = DrawFormulaDic['weight'].EvalInstance()
                    elif DrawFormulaDic['type'] == 'shape':
                        DrawFormulaDic['cut'].GetNdata()
                        DrawFormulaDic['var'].GetNdata()
                        var2 = DrawFormulaDic['var'].EvalInstance()
                        cut2 = DrawFormulaDic['cut'].EvalInstance()
                        weight2 = weight
                    DrawFormulaDic['htree'].Fill(var2, weight2 * cut2)

                i += 1
            #####
            #Rescaling the histograms

        for DrawFormulaDic in DrawFormulaDicList:
            hTree = DrawFormulaDic['htree']
            if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:
                if TrainFlag:
                    if 'ZJets_amc' in job.name:
                        print 'No rescale applied for the sample', job.name
                        MC_rescale_factor = 1.
                    else:
                        MC_rescale_factor = 2.  ##FIXME## only dataset used for training must be rescaled!!
                else:
                    MC_rescale_factor = 1.

                if 'LHE_weights_scale_wgt[0+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 0, self.lumi,
                        count) * MC_rescale_factor
                elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 1, self.lumi,
                        count) * MC_rescale_factor
                elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 2, self.lumi,
                        count) * MC_rescale_factor
                elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 3, self.lumi,
                        count) * MC_rescale_factor
                else:
                    ScaleFactor = self.tc.get_scale(
                        job, self.config, self.lumi, count) * MC_rescale_factor
            else:
                if 'LHE_weights_scale_wgt[0+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 0, self.lumi, count)
                elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 1, self.lumi, count)
                elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 2, self.lumi, count)
                elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                    ScaleFactor = self.tc.get_scale_LHE(
                        job, self.config, 3, self.lumi, count)
                else:
                    ScaleFactor = self.tc.get_scale(job, self.config,
                                                    self.lumi, count)
            if ScaleFactor != 0:
                hTree.Scale(ScaleFactor)
            integral = hTree.Integral()
            # !! Brute force correction for histograms with negative integral (problems with datacard) !!
            if integral < 0:
                hTree.Scale(-0.001)
            if addOverFlow:
                uFlow = hTree.GetBinContent(0) + hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX() +
                                            1) + hTree.GetBinContent(
                                                hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(0), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(1), 2))
                oFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX() +
                                                       1), 2))
                hTree.SetBinContent(1, uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(), oFlow)
                hTree.SetBinError(1, uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(), oFlowErr)
            hTree.SetDirectory(0)
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else:
                #print 'not rebinning %s'%job.name
                gDict[group] = hTree
#            print("STOP %s"%treeVar)
            hTreeList.append(gDict)
        First_iter = False
        #        print "get_histos_from_tree DONE for ",job.name, "var", options['var'], " in ", str(time.time() - start_time)," s."

        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        return hTreeList

    def get_histos_from_tree(self,
                             job,
                             quick=True,
                             subcut_=None,
                             replacement_cut=None,
                             nomOnly=False):
        start_time = time.time()

        print "=============================================================\n"
        print "THE SAMPLE IS ", job.name
        print "=============================================================\n"

        #print "Begin to extract the histos from trees (get_histos_from_tree)"
        #print "=============================================================\n"

        if self.lumi == 0:
            lumi = self.config.get('Plot_general', 'lumi')
            #print("You're trying to plot with no lumi, I will use ",lumi)
            self.lumi = lumi

        hTreeList = []

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis', 'TrainFlag'))

        # #Remove EventForTraining in order to run the MVA directly from the PREP step

        if not 'PSI' in self.config.get('Configuration', 'whereToLaunch'):
            #            BDT_add_cut='((evt%2) == 0 || Alt$(isData,0))'
            if 'ZJets_amc' in job.name:
                print 'No training cut for the sample', job.name
                BDT_add_cut = '1'
            else:
                BDT_add_cut = '((evt%2) == 0 || isData)'
        else:
            print 'Adding training cut'
            UseTrainSample = eval(self.config.get('Analysis',
                                                  'UseTrainSample'))
            if UseTrainSample:
                BDT_add_cut = '((evt%2) == 0 || isData)'
            else:
                if 'ZJets_amc' in job.name:
                    print 'No evt%2 cut for sample', job.name
                    BDT_add_cut = '1'
                else:
                    BDT_add_cut = '!((evt%2) == 0 || isData)'

        plot_path = self.config.get('Directories', 'plotpath')
        addOverFlow = eval(self.config.get('Plot_general', 'addOverFlow'))

        # get all Histos at once
        addCut = '1'  #'(%s)&&(%s)'%(self.tc.minCut, job.subcut) #debug!!
        print 'subcut_ is', subcut_
        if subcut_:
            addCut = subcut_
        #print 'addCut is', addCut

        # get the filenames for the root files to be read into the tree, and fill the count histograms
        rootFileNames = self.tc.get_tree(job, addCut)

        # in case of a list of files, read them as a TChain
        if type(rootFileNames) is list:
            CuttedTree = ROOT.TChain(job.tree)
            for rootFileName in rootFileNames:
                status = CuttedTree.Add(rootFileName + '/' + job.tree, 0)
                if status != 1:
                    print(
                        'ERROR: in HistoMaker.py, cannot add file to chain:' +
                        rootFileName)
            input = None
        # otherwise as a TFile for backwards compatibility
        else:
            input = ROOT.TFile.Open(rootFileNames, 'read')
            #Not: no subcut is needed since  done in caching
            #if job.subsample:
            #    addCut += '& (%s)' %(job.subcut)
            CuttedTree = input.Get(job.tree)
            CuttedTree.SetCacheSize(0)
        print 'CuttedTree.GetEntries()', CuttedTree.GetEntries()

        print "All .root MERGED for ", job.name, " in ", str(time.time() -
                                                             start_time), " s."

        #! start the loop over variables (descriebed in options)
        First_iter = True
        for options in self.optionsList:
            #print 'nomOnly is', nomOnly
            if self.optionsList.index(options) == 0:
                print 'This is the nominal histo, going to save him separatly'
            start_time = time.time()
            name = job.name
            if self.GroupDict is None:
                group = job.group
            else:
                group = self.GroupDict[job.name]
            treeVar = options['var']
            name = options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin = float(options['xMin'])
            xMax = float(options['xMax'])
            weightF = options['weight']
            if 'SBweight' in options:
                SBweight = options['SBweight']
            else:
                SBweight = None
            #print 'SBweight is', SBweight

            #Include weight per sample (specialweight)
            if 'PSI' in self.config.get('Configuration', 'whereToLaunch'):
                weightF = "(" + weightF + ")"
                #weightF="("+weightF+")*(" + job.specialweight +")"
            else:
                weightF = "(" + weightF + ")"
                #weightF="("+weightF+")*(" + job.specialweight +")"

            if 'countHisto' in options.keys() and 'countbin' in options.keys():
                count = getattr(self.tc,
                                options['countHisto'])[options['countbin']]
            else:
                count = getattr(self.tc, "CountWeighted")[0]

            #if cutOverWrite:
            #    treeCut= str(1)
            #else:
            #    treeCut='%s'%(options['cut'])
            treeCut = '%s & %s' % (options['cut'], addCut)
            print "cut1:", options['cut']
            print "cut2:", addCut
            print "treecut:", treeCut
            if replacement_cut:
                if type(replacement_cut) is str:
                    treeCut = '%s & %s' % (replacement_cut, addCut)
                elif type(replacement_cut) is list:
                    treeCut = '%s & %s' % (replacement_cut[
                        (self.optionsList).index(options)], addCut)
                else:
                    print '@ERROR: replacement_cut is neither list or string. Aborting'
                    sys.exit()

            hTree = ROOT.TH1F('%s' % name, '%s' % name, nBins, xMin, xMax)

            #If you use extension only
            hTree.Sumw2()
            hTree.SetTitle(job.name)
            #print('hTree.name() 1 =',hTree.GetName())
            #print('treeVar 1 =',treeVar)
            drawoption = ''
            #print 'treeVar: %s'%(treeVar)
            #print 'weightF: %s'%(weightF)
            #print 'BDT_add_cut: %s'%(BDT_add_cut)
            #print 'treeCut: %s'%(treeCut)

            if job.type != 'DATA':
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:  #added OPT for BDT optimisation
                    drawoption = '(%s)*(%s & %s)' % (weightF, BDT_add_cut,
                                                     treeCut)
                    #print "I'm appling: ",BDT_add_cut
                    #drawoption = 'sign(genWeight)*(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
                    #print drawoption
                else:
                    drawoption = '(%s)*(%s)' % (weightF, treeCut)
                #print ('Draw: %s>>%s' %(treeVar,name), drawoption, "goff,e")
                #print 'Are the drawoptions a string ?', isinstance(drawoption, basestring)
                #import pdb
                #if len(drawoption) > 650:
                #    pdb.set_trace()
                #nevents = CuttedTree.Draw('%s>>%s' %(treeVar,name), ROOT.TCut(drawoption), "goff,e")
                #treeVar = '1'
                #ROOT.gROOT.ProcessLine('.L /mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/myutils/TreeDraw.C')
                #from ROOT import TreeDraw
                #TD = ROOT.treedraw()
                #print 'drawoptions are', drawoption
                #Make sure sample used for sample systematics are used/skiped

                sample_sys_dic = options[
                    'sample_sys_dic'] if 'sample_sys_dic' in options else {}

                if job.name in sample_sys_dic and not sample_sys_dic[job.name]:
                    print 'sample', job.name, ' will not be ploted'
                    print 'job.name is', job.name
                    print 'sample_sys_dic is', sample_sys_dic
                else:
                    print 'sample', job.name, ' will be ploted'
                    #hTree = TD.TreeDraw(CuttedTree, hTree, '%s>>%s' %(treeVar,name), drawoption)
                    #nevents = CuttedTree.Draw('%s>>%s' %(treeVar,name), str(drawoption), "goff,e")
                    nevents = CuttedTree.Draw('%s>>%s' % (treeVar, name),
                                              str(drawoption), "goff,e")
                #if First_iter: print 'Number of events are', nevents
                #print 'nevents:',hTree.GetEntries(),' hTree.name() 2 =',hTree.GetName()
                full = True
            elif job.type == 'DATA':

                if not SBweight == None:
                    treeCutData = '(' + treeCut + ')*(' + SBweight + ')'
                else:
                    treeCutData = treeCut
                print 'treeCutData is', treeCutData

                if options['blind']:
                    lowLimitBlindingMass = 90
                    highLimitBlindingMass = 140
                    lowLimitBlindingBDT = 0.4
                    lowLimitBlindingDR = 0.8
                    highLimitBlindingDR = 1.6

                    if 'mass' in treeVar:
                        print '@ERROR: removed cut on the mass'
                        sys.exit()
                        lowLimitBlindingMass = hTree.GetBinLowEdge(
                            hTree.FindBin(lowLimitBlindingMass))
                        highLimitBlindingMass = hTree.GetBinLowEdge(
                            hTree.FindBin(
                                highLimitBlindingMass)) + hTree.GetBinWidth(
                                    hTree.GetBin(highLimitBlindingMass))
                        veto = ("(%s <%s || %s > %s)" %
                                (treeVar, lowLimitBlindingMass, treeVar,
                                 highLimitBlindingMass))
                        CuttedTree.Draw('%s>>%s' % (treeVar, name),
                                        veto + '&' + ' %(cut)s' % options,
                                        "goff,e")
                    elif 'BDT' in treeVar or 'bdt' in treeVar or 'nominal' in treeVar in treeVar:
                        lowLimitBlindingBDT = hTree.GetBinLowEdge(
                            hTree.FindBin(lowLimitBlindingBDT))
                        veto = "(%s <%s)" % (treeVar, lowLimitBlindingBDT)
                        print 'I will add the veto', veto
                        CuttedTree.Draw('%s>>%s' % (treeVar, name),
                                        veto + '&' + ' %(cut)s' % options,
                                        "goff,e")
                    elif 'dR' in treeVar and 'H' in treeVar:
                        lowLimit = hTree.GetBinLowEdge(
                            hTree.FindBin(lowLimitBlindingDR))
                        highLimit = hTree.GetBinLowEdge(
                            hTree.FindBin(highLimitBlindingDR))
                        veto = ("(%s <%s || %s > %s)" %
                                (treeVar, lowLimitBlindingMass, treeVar,
                                 highLimitBlindingMass))
                        CuttedTree.Draw('%s>>%s' % (treeVar, name),
                                        veto + '&' + ' %(cut)s' % options,
                                        "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s' % (treeVar, name),
                                        '%s' % treeCutData, "goff,e")
                else:
                    CuttedTree.Draw('%s>>%s' % (treeVar, name),
                                    '%s' % treeCutData, "goff,e")
                full = True
            # if full:
            # hTree = ROOT.gDirectory.Get(name)
            # print('histo1',ROOT.gDirectory.Get(name))
            # else:
            # hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
            # hTree.Sumw2()
            # print('histo2',ROOT.gDirectory.Get(name))
#            print("END DRAWING")
#            print("START RESCALE")
# if full: print 'hTree',hTree.GetName()

            if job.type != 'DATA':
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:
                    if TrainFlag:
                        if 'ZJets_amc' in job.name:
                            print 'No rescale applied for the sample', job.name
                            MC_rescale_factor = 1.
                        else:
                            MC_rescale_factor = 2.  ##FIXME## only dataset used for training must be rescaled!!
                    else:
                        MC_rescale_factor = 1.

                    if 'LHE_weights_scale_wgt[0+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 0, self.lumi,
                            count) * MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 1, self.lumi,
                            count) * MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 2, self.lumi,
                            count) * MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 3, self.lumi,
                            count) * MC_rescale_factor
                    else:
                        ScaleFactor = self.tc.get_scale(
                            job, self.config, self.lumi,
                            count) * MC_rescale_factor
                else:
                    if 'LHE_weights_scale_wgt[0+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 0, self.lumi, count)
                    elif 'LHE_weights_scale_wgt[1+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 1, self.lumi, count)
                    elif 'LHE_weights_scale_wgt[2+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 2, self.lumi, count)
                    elif 'LHE_weights_scale_wgt[3+2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHE(
                            job, self.config, 3, self.lumi, count)
                    else:
                        ScaleFactor = self.tc.get_scale(
                            job, self.config, self.lumi, count)
                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)
                integral = hTree.Integral()
                #print '\t-->import %s\t Integral: %s'%(job.name,integral)
                #print("job:",job.name," ScaleFactor=",ScaleFactor)
                #print("END RESCALE")
                #print("START addOverFlow")
                # !! Brute force correction for histograms with negative integral (problems with datacard) !!
                if integral < 0:
                    hTree.Scale(-0.001)
                    #print "#"*30
                    #print "#"*30
                    #print "original integral was:",integral
                    #print "now is:", hTree.Integral()
                    #print "#"*30
                    #print "#"*30
            if addOverFlow:
                uFlow = hTree.GetBinContent(0) + hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX() +
                                            1) + hTree.GetBinContent(
                                                hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(0), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(1), 2))
                oFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX() +
                                                       1), 2))
                hTree.SetBinContent(1, uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(), oFlow)
                hTree.SetBinError(1, uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(), oFlowErr)
            hTree.SetDirectory(0)
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else:
                print 'not rebinning %s' % job.name
                gDict[group] = hTree


#            print("STOP %s"%treeVar)
            hTreeList.append(gDict)
            First_iter = False
            #print "get_histos_from_tree DONE for ",job.name, "var", options['var'], " in ", str(time.time() - start_time)," s."
            #if self.optionsList.index(options) == 0:
            #    c = ROOT.TCanvas('c','c')
            #    c.cd()
            #    hTree.Draw()
            #    c.SaveAs('/mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/TESTDC/'+group+hTree.GetName()+job.name+'.root')
            #    c.SaveAs('/mnt/t3nfs01/data01/shome/gaperrin/VHbb/CMSSW_7_4_3/src/Xbb/python/TESTDC/'+group+hTree.GetName()+job.name+'.C')
            #    break
            if nomOnly: break

        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        #print "Finished to extract the histos from trees (get_histos_from_tree)"
        #print "================================================================\n"
        #print "get_histos_from_tree DONE for ",job.name," in ", str(time.time() - start_time)," s."
        return hTreeList

    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self,
                   bg_list,
                   nBins_start=1000,
                   tolerance=0.35,
                   dc_step=False):
        #print "START calc_rebin"
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i = 0
        #add all together:
        print '\n\t...calculating rebinning...'
        for job in bg_list:
            #print "job",job
            if dc_step:
                #htree = self.get_histos_from_tree_dc(job)[0].values()[0]
                #self,job,quick=True, subcut_ = None, replacement_cut = None, nomOnly = False
                #htree = self.get_histos_from_tree(job)[0].values()[0]
                #htree = self.get_histos_from_tree_dc(job, True, None, None, True)[0].values()[0]
                htree = self.get_histos_from_tree(job, True, None, None,
                                                  True)[0].values()[0]
            else:
                #self,job,quick=True, subcut_ = None, replacement_cut = None, nomOnly = False
                #htree = self.get_histos_from_tree(job)[0].values()[0]
                #htree = self.get_histos_from_tree()[0].values()[0]
                htree = self.get_histos_from_tree(job, True, None, None,
                                                  True)[0].values()[0]
            print "Integral", job, htree.Integral()
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree, 1)
            del htree
            i += 1
        ErrorR = 0
        ErrorL = 0
        TotR = 0
        TotL = 0
        binR = self.rebin_nBins
        binL = 1
        rel = 1.0
        print "START loop from right"
        #print "totalBG.Draw("","")",totalBG.Integral()
        #---- from right
        while rel > tolerance:
            TotR += totalBG.GetBinContent(binR)
            ErrorR = sqrt(ErrorR**2 + totalBG.GetBinError(binR)**2)
            binR -= 1
            if binR < 0: break
            if TotR < 1.: continue
            print 'binR is', binR
            print 'TotR is', TotR
            print 'ErrorR is', ErrorR
            if not TotR <= 0 and not ErrorR == 0:
                rel = ErrorR / TotR
                print 'rel is', rel
        print 'upper bin is %s' % binR
        print "END loop from right"

        #---- from left

        rel = 1.0
        print "START loop from left"
        while rel > tolerance:
            TotL += totalBG.GetBinContent(binL)
            ErrorL = sqrt(ErrorL**2 + totalBG.GetBinError(binL)**2)
            binL += 1
            if binL > nBins_start: break
            if TotL < 1.: continue
            if not TotL <= 0 and not ErrorL == 0:
                rel = ErrorL / TotL
                print rel
        #it's the lower edge
        print "STOP loop from left"
        binL += 1
        print 'lower bin is %s' % binL

        inbetween = binR - binL
        stepsize = int(inbetween) / (int(self.norebin_nBins) - 2)
        modulo = int(inbetween) % (int(self.norebin_nBins) - 2)

        print 'stepsize %s' % stepsize
        print 'modulo %s' % modulo
        binlist = [binL]
        for i in range(0, int(self.norebin_nBins) - 3):
            binlist.append(binlist[-1] + stepsize)
        binlist[-1] += modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins + 1)
        print 'binning set to %s' % binlist
        #print "START REBINNER"
        #if not self.Custom_BDT_bins:
        #    self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        #else:
        if self.BDTmin:
            if not type(self.BDTmin) is list: self.BDTmin = [self.BDTmin]
            #default from the rebinner
            default_rebin = array('d', [-1.0] +
                                  [totalBG.GetBinLowEdge(i) for i in binlist])
            #cutsom lower bins
            custom_rebin = array('d', self.BDTmin)
            #now making event bins between rightmost bin and cutom bin i.e. [self.BDTmin, event bins, last bin]
            print 'The original number of bins is', len(default_rebin) - 1
            nBetween_bins = (len(default_rebin) - 1) - 1 - (len(self.BDTmin) -
                                                            1)
            if default_rebin[-2] < self.BDTmin[-1]:
                print '@ERROR: the custom lower BDT range contains lower edge of rightmost bin. Please change the range definiton. Aborting'
                sys.exit()
            custom_step = (default_rebin[-2] -
                           self.BDTmin[-1]) / (nBetween_bins * 1.0)
            for b in range(1, nBetween_bins):
                custom_rebin.append(self.BDTmin[-1] + b * custom_step)
            custom_rebin.append(default_rebin[-2])
            custom_rebin.append(1)
            print 'custom_rebin is', custom_rebin
            print 'the final number of bins is', len(custom_rebin) - 1
            self.mybinning = Rebinner(
                len(custom_rebin) - 1, custom_rebin, True, True)
            #else:
            #if type(self.BDTmin) is list:
            #    #default from the rebinner
            #    default_rebin = array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist])
            #    #cutsom lower bins
            #    custom_rebin = array('d',self.BDTmin)

            #    #now making event bins between rightmost bin and cutom bin i.e. [self.BDTmin, event bins, last bin]
            #    nBetween_bins = self.norebin_nBins - 1 - len(self.BDTmin)
            #    custom_step = (default_rebin[-1] - self.BDTmin[-1])/(nBetween_bins*1.0)
            #    for b in range(1,nBetween_bins):
            #        custom_rebin.append(self.BDTmin[-1] + b*custom_step)
            #    custom_rebin.append(default_rebin[-1])
            #    custom_rebin.append(1)
            #    #for b in default_rebin:
            #    #    if b <= self.BDTmin[-1]: continue
            #    #    custom_rebin.append(b)
            #else:
            #    default_rebin = array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist])
            #    custom_rebin = array('d',[self.BDTmin])
            #    for b in default_rebin:
            #        if b <= self.BDTmin: continue
            #        custom_rebin.append(b)
            #    self.mybinning = Rebinner(len(custom_rebin) -1 ,custom_rebin),True,self.BDTmin)
        else:
            self.mybinning = Rebinner(
                int(self.norebin_nBins),
                array('d',
                      [-1.0] + [totalBG.GetBinLowEdge(i) for i in binlist]),
                True)
            #self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-0.8]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        #self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[0.]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts, setup):
        '''
        Setup is defined in the plot conf file
        histo_dicts contains an array of dictionnary
        '''

        from array import array
        doubleVariable = array('d', [0])

        #print "Start orderandadd"
        #print "=================\n"
        #print "Input dict is", histo_dicts

        ordered_histo_dict = {}
        #print "orderandadd-setup",setup
        #print "orderandadd-histo_dicts",histo_dicts
        for sample in setup:
            sumintegral = 0
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    integral = histo_dict[sample].IntegralAndError(
                        0, histo_dict[sample].GetNbinsX(), doubleVariable)
                    error = doubleVariable[0]
                    entries = histo_dict[sample].GetEntries()
                    subsamplename = histo_dict[sample].GetTitle()
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    printc(
                        'magenta', '',
                        '\t--> added %s to %s Integral: %s Entries: %s Error: %s'
                        % (subsamplename, sample, integral, entries, error))
                    sumintegral += integral
                    nSample += 1
            #print 'The final integral is %s' % sumintegral
        del histo_dicts
        #print "Output dict is", ordered_histo_dict
        return ordered_histo_dict
示例#16
0
class HistoMaker:
    def __init__(self, samples, path, config, optionsList,GroupDict=None):
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        self.weight = []

        for options in optionsList:
            self.cuts.append(options['cut'])
            self.weight.append(options['weight'])
            
        print '  with Cuts  : ', self.cuts[0]
        print '  and Weights: ', self.weight[0]

        self.tc = TreeCache(self.cuts, samples, path, config)

        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

        

    def get_histos_from_tree(self,job,cutOverWrite=None):

        #if self.lumi == 0: 
        #    raise Exception("You're trying to plot with no lumi")
         
        hTreeList=[]

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis','TrainFlag'))
        if TrainFlag:
            BDT_test_cut = 'evt%2!=0'
            print '\n======== Filling Datacard with BDT Test Events =========='
        else: 
            BDT_test_cut = 'evt%2==0'
            print '\n======== Filling Datacard with BDT Train Events =========='

        plot_path   = self.config.get('Directories','plotpath')
        addOverFlow = eval(self.config.get('Plot_general','addOverFlow'))
        
        # Get the tree for this sample(not actually cut yet)
        CuttedTree = self.tc.get_tree(job,'1')
        
        # Get the lumiweighted cross section and genWeight from the tree
        if job.type != 'DATA':
            xSec = self.tc.get_scale(job, self.config)
            
            
        if job.type == 'DATA':
            xSec = 1
            
        #print '-----> Job Name, Type: ', job.name, job.type
        #print '       xSec: ', xSec
        #print '       Tree: ', CuttedTree
        #print '    Entries: ', CuttedTree.GetEntries()
        
        eval(self.config.get('Analysis','TrainFlag'))                        
        for options in self.optionsList:

            name = job.name

            if self.GroupDict is None:
                group=job.group
            else:
                group=self.GroupDict[job.name]
                
            # Temp hack to make datacrads with MC
            #if job.name == 'Zee' or job.name == 'Zuu' or job.name == 'Zll':
            #    job.type = 'DATA'
                
            treeVar = options['var']
            name    = options['name']
            #print options
            #print 'Type: ', job.type
            #print 'Name: ', name
            
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])

            xMin = float(options['xMin'])
            xMax = float(options['xMax'])

            # WARNING: This piece of code outdated.  Currently runs with weightF = 1.  Scaling applied on line 145
            #if job.name == 'Zcudsg' or job.name == 'Zcc' or job.name == 'Z1b' or job.name == 'Z2b': 
            #    weightF = 1
            #else: weightF = 1
                        
            if cutOverWrite:
                treeCut=cutOverWrite
            else:
                treeCut='%s'%(options['cut'])
                
            weightF = '%s'%(options['weight'])    
            
            # DY stitching
            #if not job.specialweight: job.specialweight = "1"
            #print 'specialweight:', job.specialweight
            #job.specialweight = "1"
            #weightF = "("+weightF+")*("+job.specialweight+")"
            
            
            print '\n-----> Making histograms for variable:', treeVar
              
            if job.type != 'DATA':
                
                # Set the Pile-up weight
                #weightF = self.config.get('Weights','weightF')
                #print 'weightF: ', weightF
                
                if CuttedTree.GetEntries():
                    
                    #if 'trainBDT' in treevar:
                    #    drawoption = '(%s)*(%s & %s)'%(weightF,treeCut,BDT_train_cut)
                    
                    if 'gg_plus' in treeVar or 'VV_bdt' in treeVar: 
                        bdt_xSec = xSec*2
                        print '\n----> Filling Histogram with BDT Test Events...'
                        drawoption = '(sign(genWeight))*(%s)*(%s & %s)*(%s)'%(weightF, treeCut, BDT_test_cut, bdt_xSec)
                    
                    elif 'vtx' in treeVar or 'EmEF' in treeVar:
                        drawoption = '(sign(genWeight))*(%s)*(%s)*(%s)' % (weightF,treeCut+' && '+treeVar+' > 0',xSec)
    
                    else: 
                        drawoption = '(sign(genWeight))*(%s)*(%s)*(%s)' % (weightF,treeCut,xSec)
                        
                
                    #drawoption = '(sign(genWeight))*(%s)*(%s)*(%s)' % (weightF,treeCut,xSec)
                    #print'\t ----> with Draw CutString: ', drawoption 
                        
                    CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), drawoption, "goff,e")
                    

                    
                    full = True

                else:
                    full=False

            elif job.type == 'DATA':

                print '\n----> Job Type: Data...'
                print '            Name: ', job.name

                print '\n---->Drawing Tree for variable: ',treeVar
                print '  with Cuts: ', treeCut
                
                
                if 'gg_plus' in treeVar:
                    if options['blind']:
                        treeCut = treeCut + ' & '+treeVar+'<0.2'
                        print '\n\n====== BLINDED ======'
                        print treeCut
                        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut, "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut, "goff,e")

                elif 'HCSV_reg_mass' in treeVar:
                    if options['blind']:
                        treeCut = treeCut + ' & HCSV_reg_mass < 90. & HCSV_reg_mass > 145.'
                        print '\n\n====== BLINDED ======'
                        print treeCut
                        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut, "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut, "goff,e")
                    
                        
                elif '_corr' in treeVar:
                    print 'VAR:',treeVar
                    if '[0]' in treeVar:
                        new_treeVar = 'Jet_pt[hJCidx[0]]/Jet_rawPt[hJCidx[0]]'
                    if '[1]' in treeVar:
                        new_treeVar = 'Jet_pt[hJCidx[1]]/Jet_rawPt[hJCidx[1]]'
                    CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(new_treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut, "goff,e")
                    

                elif 'vtx' in treeVar or 'EmEF' in treeVar:
                    CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut+' && '+treeVar+' > 0', "goff,e")
                    
                
                else:
                    if options['blind']:
                        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s & V_pt < 0.' %treeCut, "goff,e")
                    else:
                        print '!!!!NOT BLINDING!!!'
                        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s' %treeCut, "goff,e")
                        


                print CuttedTree
                
                '''
                hReg_metric = hReg.GetRMS()/hReg.GetMean()
                hNom_metric = hNom.GetRMS()/hNom.GetMean()
                percent_improvement = (1-(hReg_metric/hNom_metric))*100
                hReg_std = str(round(hReg.GetRMS(),3))
                hReg_mu  = str(round(hReg.GetMean(),3))
                hNom_std = str(round(hNom.GetRMS(),3))
                hNom_mu  = str(round(hNom.GetMean(),3))
                '''

                full = True

            if full:
                hTree = ROOT.gDirectory.Get(job.name)
                print '\nJob name: ',job.name
                print 'hTree: ', hTree
                
                # Get Stats
                hTree.GetRMS()
                hTree.GetMean()
                print '\t Mean: ', hTree.GetMean()
                print '\t RMS : ', hTree.GetRMS()
                

            else:
                hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
                hTree.Sumw2()
                
            
            # NOW scale the histograms    
            if job.type != 'DATA':

                if 'gg_plus' in treeVar or 'trainBDT' in treeVar:
                    if TrainFlag:
                        MC_rescale_factor=2.
                        print 'I RESCALE BY 2.0'
                    else: 
                        MC_rescale_factor = 1.
    
                    ScaleFactor = self.tc.get_scale(job,self.config)*MC_rescale_factor
                    print '\n-----> Histogram Scale Factor: ', ScaleFactor
                else: 
                    ScaleFactor = self.tc.get_scale(job, self.config)
                    #ScaleFactor = self.tc.get_scale(job,self.config,self.lumi)
                    print '\n-----> Histogram Scale Factor: ', ScaleFactor
                #if ScaleFactor != 0:
                #    hTree.Scale(ScaleFactor)
                    
            #print '\t-->import %s\t Integral: %s'%(job.name,hTree.Integral())
            if addOverFlow:
            	uFlow = hTree.GetBinContent(0)+hTree.GetBinContent(1)
            	oFlow = hTree.GetBinContent(hTree.GetNbinsX()+1)+hTree.GetBinContent(hTree.GetNbinsX())
            	uFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(0),2)+ROOT.TMath.Power(hTree.GetBinError(1),2))
            	oFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()),2)+ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()+1),2))
            	hTree.SetBinContent(1,uFlow)
            	hTree.SetBinContent(hTree.GetNbinsX(),oFlow)
            	hTree.SetBinError(1,uFlowErr)
            	hTree.SetBinError(hTree.GetNbinsX(),oFlowErr)

            hTree.SetDirectory(0)
            gDict = {}

            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else: 
                #print 'not rebinning %s'%job.name 
                gDict[group] = hTree

            hTreeList.append(gDict)

        CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        return hTreeList


       
    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.25):
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i=0

        #add all together:
        print '\n\t...calculating rebinning...'

        for job in bg_list:

            htree = self.get_histos_from_tree(job)[0].values()[0]
            #print 'htree:', htree
            
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree,1)
            del htree
            i+=1

        ErrorR=0
        ErrorL=0
        TotR=0
        TotL=0
        binR=self.rebin_nBins
        binL=1
        rel=1.0

        #---- from right
        while rel > tolerance:
            TotR+=totalBG.GetBinContent(binR)
            ErrorR=sqrt(ErrorR**2+totalBG.GetBinError(binR)**2)
            binR-=1
            if not TotR == 0 and not ErrorR == 0:
                rel=ErrorR/TotR
                #print rel
        #print 'upper bin is %s'%binR

        #---- from left
        rel=1.0
        while rel > tolerance:
            TotL+=totalBG.GetBinContent(binL)
            ErrorL=sqrt(ErrorL**2+totalBG.GetBinError(binL)**2)
            binL+=1
            if not TotL == 0 and not ErrorL == 0:
                rel=ErrorL/TotL
                #print rel
        #it's the lower edge
        binL+=1
        #print 'lower bin is %s'%binL

        inbetween=binR-binL
        stepsize=int(inbetween)/(int(self.norebin_nBins)-2)
        modulo = int(inbetween)%(int(self.norebin_nBins)-2)

        #print 'stepsize %s'% stepsize
        #print 'modulo %s'%modulo
        binlist=[binL]
        for i in range(0,int(self.norebin_nBins)-3):
            binlist.append(binlist[-1]+stepsize)
        binlist[-1]+=modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins+1)
        #print 'binning set to %s'%binlist
        self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts,setup):
        ordered_histo_dict = {}
        for sample in setup:
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        #printc('magenta','','\t--> added %s to %s'%(sample,sample))
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    nSample += 1
        del histo_dicts
        return ordered_histo_dict 
示例#17
0
class HistoMaker:
    def __init__(self, samples, path, config, optionsList,GroupDict=None):
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print self.cuts
        #self.tc = TreeCache(self.cuts,samples,path) 
        print "Cuts:",self.cuts
        self.tc = TreeCache(self.cuts,samples,path,config)
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

    def get_histos_from_tree(self,job,cutOverWrite=None,quick=True):
        print "get_histos_from_tree START for ",job.name
        if self.lumi == 0: 
            lumi = self.config.get('Plot_general','lumi')
            print("You're trying to plot with no lumi, I will use ",lumi)
            self.lumi = lumi
         
        hTreeList=[]

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis','TrainFlag'))
        BDT_add_cut='EventForTraining == 0'


        plot_path = self.config.get('Directories','plotpath')
        addOverFlow=eval(self.config.get('Plot_general','addOverFlow'))

        # get all Histos at once
        CuttedTree = self.tc.get_tree(job,'1')
        # print 'CuttedTree.GetEntries()',CuttedTree.GetEntries()
#        print 'begin self.optionsList',self.optionsList
        # print 'end self.optionsList'
        for options in self.optionsList:
            name=job.name
            if self.GroupDict is None:
                group=job.group
            else:
                group=self.GroupDict[job.name]
            treeVar=options['var']
#            print("START %s"%treeVar)
            name=options['name']
            # print 'options[\'name\']',options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin=float(options['xMin'])
            xMax=float(options['xMax'])
            weightF=options['weight']
            if cutOverWrite:
                treeCut=cutOverWrite
            else:
                treeCut='%s'%(options['cut'])

            #options
            # print 'treeCut',treeCut
            # print 'weightF',weightF
            
            hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
            hTree.Sumw2()
#            print('hTree.name() 1 =',hTree.GetName())
            drawoption = ''
#            print("START DRAWING")
            if job.type != 'DATA':
                if CuttedTree and CuttedTree.GetEntries():
                    drawoption = '(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
#                    drawoption = 'sign(genWeight)*(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
                    CuttedTree.Draw('%s>>%s' %(treeVar,name), drawoption, "goff,e")
                    full=True
#                    if 'RTight' in treeVar or 'RMed' in treeVar: 
#                        drawoption = '(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
#                        #print drawoption
#                    else: 
#                        drawoption = '(%s)*(%s)'%(weightF,treeCut)
##                    print ('Draw: %s>>%s' %(treeVar,name), drawoption, "goff,e")
##                    print
#                    nevent = CuttedTree.Draw('%s>>%s' %(treeVar,name), drawoption, "goff,e")
##                    print name
##                    print('hTree.name() 2 =',hTree.GetName()," nevent=",nevent)
#                    full=True
                else:
                    full=False
            elif job.type == 'DATA':
                if options['blind']:
                    if treeVar == 'H.mass':
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),' (%(var)s <90. || %(var)s > 150.) & %(cut)s' %options, "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),'%(var)s < 0. & %(cut)s'%options, "goff,e")

                else:
                    CuttedTree.Draw('%s>>%s' %(treeVar,name),'%s' %treeCut, "goff,e")
                full = True
            # if full:
                # hTree = ROOT.gDirectory.Get(name)
                # print('histo1',ROOT.gDirectory.Get(name))
            # else:
                # hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
                # hTree.Sumw2()
                # print('histo2',ROOT.gDirectory.Get(name))
#            print("END DRAWING")
#            print("START RESCALE")
            # if full: print 'hTree',hTree.GetName()
              
            if job.type != 'DATA':
                if 'RTight' in treeVar or 'RMed' in treeVar:
                    if TrainFlag:
                        MC_rescale_factor=2.
                        #print 'I RESCALE BY 2.0'
                    else: 
                        MC_rescale_factor = 1.
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi)*MC_rescale_factor
                else: 
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi)
                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)
            #print '\t-->import %s\t Integral: %s'%(job.name,hTree.Integral())
#            print("job:",job.name," ScaleFactor=",ScaleFactor)
#            print("END RESCALE")
#            print("START addOverFlow")
            if addOverFlow:
            	uFlow = hTree.GetBinContent(0)+hTree.GetBinContent(1)
            	oFlow = hTree.GetBinContent(hTree.GetNbinsX()+1)+hTree.GetBinContent(hTree.GetNbinsX())
            	uFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(0),2)+ROOT.TMath.Power(hTree.GetBinError(1),2))
            	oFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()),2)+ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()+1),2))
            	hTree.SetBinContent(1,uFlow)
            	hTree.SetBinContent(hTree.GetNbinsX(),oFlow)
            	hTree.SetBinError(1,uFlowErr)
            	hTree.SetBinError(hTree.GetNbinsX(),oFlowErr)
            hTree.SetDirectory(0)
#            print("STOP addOverFlow")
#            print("START rebin")
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else: 
                #print 'not rebinning %s'%job.name 
                gDict[group] = hTree
#            print("STOP %s"%treeVar)
            hTreeList.append(gDict)
        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        print "get_histos_from_tree DONE for ",job.name
        return hTreeList
       
    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.25):
        print "START calc_rebin"
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i=0
        #add all together:
        print '\n\t...calculating rebinning...'
        for job in bg_list:
            print "job",job
            htree = self.get_histos_from_tree(job)[0].values()[0]
            print "Integral",job,htree.Integral()
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree,1)
            del htree
            i+=1
        ErrorR=0
        ErrorL=0
        TotR=0
        TotL=0
        binR=self.rebin_nBins
        binL=1
        rel=1.0
        print "START loop from right"
        print "totalBG.Draw("","")",totalBG.Integral()
        #---- from right
        while rel > tolerance:
            TotR+=totalBG.GetBinContent(binR)
            ErrorR=sqrt(ErrorR**2+totalBG.GetBinError(binR)**2)
            binR-=1
#            print "TotR",TotR
#            print "ErrorR",ErrorR
#            print "rel",rel
            if not TotR == 0 and not ErrorR == 0:
                rel=ErrorR/TotR
                print rel
        #print 'upper bin is %s'%binR
        print "END loop from right"

        #---- from left
        rel=1.0
        print "START loop from left"
        while rel > tolerance:
            TotL+=totalBG.GetBinContent(binL)
            ErrorL=sqrt(ErrorL**2+totalBG.GetBinError(binL)**2)
            binL+=1
            if not TotL == 0 and not ErrorL == 0:
                rel=ErrorL/TotL
                #print rel
        #it's the lower edge
        print "STOP loop from left"
        binL+=1
        #print 'lower bin is %s'%binL

        inbetween=binR-binL
        stepsize=int(inbetween)/(int(self.norebin_nBins)-2)
        modulo = int(inbetween)%(int(self.norebin_nBins)-2)

        #print 'stepsize %s'% stepsize
        #print 'modulo %s'%modulo
        binlist=[binL]
        print "jjj"
        for i in range(0,int(self.norebin_nBins)-3):
            binlist.append(binlist[-1]+stepsize)
        binlist[-1]+=modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins+1)
        #print 'binning set to %s'%binlist
        print "START REBINNER"
        self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        self._rebin = True
        print "STOP calc_rebin"
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts,setup,jobnames):
        ordered_histo_dict = {}
        print "orderandadd-setup",setup
        print "orderandadd-histo_dicts",histo_dicts
        for sample in setup:
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    integral = histo_dict[sample].Integral()
                    entries = histo_dict[sample].GetEntries()
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    printc('magenta','','\t--> added %s to %s. Integral: %s. Entries: %s'%(jobnames[histo_dicts.index(histo_dict)],sample,integral,entries))
                    ordered_histo_dict[sample].Add(histo_dict[sample])
                    nSample += 1
        print "orderandadd-ordered_histo_dict",ordered_histo_dict
        del histo_dicts
        return ordered_histo_dict 
示例#18
0
class HistoMaker:
    def __init__(self, samples, path, config, optionsList, GroupDict=None):
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi = 0.
        self.cuts = []
        self.weight = []
        self.sys_cuts = []

        for options in optionsList:
            self.cuts.append(options['cut'])
            self.weight.append(options['weight'])
            #self.sys_cuts.append(options['sys_cut'])

        print '  with Cuts  : ', self.cuts[0]
        print '  and Weights: ', self.weight[0]

        self.tc = TreeCache(self.cuts, samples, path, config)

        self._rebin = False
        self.mybinning = None
        self.GroupDict = GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(VHbbNameSpace)

    def get_histos_from_tree(self, job, cutOverWrite=None):

        #if self.lumi == 0:
        #    raise Exception("You're trying to plot with no lumi")

        hTreeList = []

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis', 'TrainFlag'))
        if TrainFlag:
            BDT_test_cut = 'evt%2!=0'
            #print '\n======== Filling Datacard with BDT Test Events =========='
        else:
            BDT_test_cut = 'evt%2==0'
            #print '\n======== Filling Datacard with BDT Train Events =========='

        plot_path = self.config.get('Directories', 'plotpath')
        addOverFlow = eval(self.config.get('Plot_general', 'addOverFlow'))

        # Get the tree for this sample(not actually cut yet)
        CuttedTree = self.tc.get_tree(job, '1')

        # Get the lumiweighted cross section and genWeight from the tree
        if job.type != 'DATA':
            xSec = self.tc.get_scale(job, self.config)
            '''
            # FOr temp DY special weights
            if 'Vpt100to250' in job.name:
                 xSec = xSec*(1.0-0.66)
            if 'Vpt250to400' in job.name:
                xSec = xSec*(1.0-0.85)
            if 'Vpt400to650' in job.name:
                xSec =xSec*(1.0-0.98)
            if 'Vpt650toInf' in job.name:
                xSec =xSec*(1.0-1.0)
            '''

        if job.type == 'DATA':
            xSec = 1

        #print '-----> Job Name, Type: ', job.name, job.type
        #print '       xSec: ', xSec
        #print '       Tree: ', CuttedTree
        #print '    Entries: ', CuttedTree.GetEntries()
        #print '    treeCut: ', options['cut']

        eval(self.config.get('Analysis', 'TrainFlag'))
        for options in self.optionsList:

            name = job.name

            if self.GroupDict is None:
                group = job.group
            else:
                group = self.GroupDict[job.name]

            treeVar = options['var']
            name = options['name']
            #print 'Options:', options
            #print 'Type: ', job.type
            #print 'Name: ', name

            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])

            xMin = float(options['xMin'])
            xMax = float(options['xMax'])

            if cutOverWrite:
                treeCut = cutOverWrite
            else:
                treeCut = '%s' % (options['cut'])

            # Add the JEC/JER sys cuts by hand
            if 'gg_plus_' in treeVar or 'VV' in treeVar:
                if '_up' in treeVar or '_down' in treeVar:
                    if options['sys_cut']:
                        treeCut = '%s' % (options['sys_cut'])
                        print treeVar
                        print '\n\t!!!! JER/JEC Tree SYS Cut:', treeCut

            if 'minCMVA' in treeVar:
                if options['sys_cut']:
                    treeCut = '%s' % (options['sys_cut'])
                    print treeVar
                    print '\n\t!!!! JER/JEC Tree SYS Cut:', treeCut

            weightF = '%s' % (options['weight'])

            ##############################
            # Add any special weights here
            if 'Zudsg' in job.name or 'Zcc' in job.name or 'Z1b' in job.name or 'Z2b' in job.name:
                weightF = weightF + '*VHbb::LOtoNLOWeightBjetSplitEtabb(abs(Jet_eta[hJCidx[0]]-Jet_eta[hJCidx[1]]),Sum$(GenJet_pt>20 && abs(GenJet_eta)<2.4 && GenJet_numBHadrons))'
                weightF = weightF + '*VHbb::ptWeightEWK_Zll(nGenVbosons[0], GenVbosons_pt[0], VtypeSim, nGenTop, nGenHiggsBoson)'
                weightF = weightF + '*(' + job.specialweight + ')'
            if '2L2Q' in job.name:
                print '\n\t----> Adding ZZ_2L2Q special weights...'
                weightF = weightF + '*(' + job.specialweight + ')'
            #if 'ttbar' in job.name:
            #    weightF = weightF+'*VHbb::ttbar_reweight(GenTop_pt[0],GenTop_pt[1],nGenTop)'
            #if 'ZH' in job.name and not 'ggZH' in job.name:
            #    weightF = weightF+'*VHbb::ptWeightEWK_Zll_v25(nGenVbosons[0], GenVbosons_pt[0], VtypeSim)'

            # For high/low SF
            if str(self.config.get('Plot_general', 'doSF')) == 'True':

                print '\n\t !!! Adding RateParam !!!'

                if 'V_new_pt > 50' in treeCut:
                    if 'Zudsg' in job.name or 'Zcc' in job.name:
                        weightF = weightF + '*(0.921)'
                    if 'Z1b' in job.name: weightF = weightF + '*(0.86)'
                    if 'Z2b' in job.name: weightF = weightF + '*(1.0)'
                    if 'ttbar' in job.name: weightF = weightF + '*(1.0037)'

                if 'V_new_pt > 150' in treeCut:
                    if 'Zudsg' in job.name or 'Zcc' in job.name:
                        weightF = weightF + '*(1.0037)'
                    if 'Z1b' in job.name: weightF = weightF + '*(0.985)'
                    if 'Z2b' in job.name: weightF = weightF + '*(1.27)'
                    if 'ttbar' in job.name: weightF = weightF + '*(0.959)'

            print '\n-----> Making histograms for variable:', treeVar
            print 'Job.name:', job.name
            print 'weightF:', weightF
            print 'nBins:', nBins, xMin, xMax

            if job.type != 'DATA':

                if CuttedTree.GetEntries():

                    #if 'trainBDT' in treevar:
                    #    drawoption = '(%s)*(%s & %s)'%(weightF,treeCut,BDT_train_cut)

                    if 'gg_plus' in treeVar or 'VV' in treeVar:
                        print '\n----> Filling Histogram with BDT Test Events Only...'
                        drawoption = '(sign(genWeight))*(%s)*(%s & %s)' % (
                            weightF, treeCut, BDT_test_cut)

                    elif 'vtx' in treeVar or 'EmEF' in treeVar or '_lepton' in treeVar:
                        drawoption = '(sign(genWeight))*(%s)*(%s)' % (
                            weightF, treeCut + ' && ' + treeVar + ' > 0')
                        '''    
                        elif 'minCSV' in treeVar:
                        if 'Zudsg' in job.name or 'Zcc' in job.name: 
                        xMin = 0.0
                            xMax = 0.55
                        if 'Z1b' in job.name or 'Z2b' in job.name or 'ttbar' in job.name:
                            xMin = 0.4
                            xMax = 1.0
                            drawoption = '(sign(genWeight))*(%s)*(%s)' % (weightF,treeCut)
                            '''
                    else:
                        #drawoption = '(sign(genWeight))*(%s)*(%s)*(%s)' % (weightF,treeCut,xSec)
                        drawoption = '(sign(genWeight))*(%s)*(%s)' % (weightF,
                                                                      treeCut)

                    print 'Draw Option:', drawoption

                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        (treeVar, job.name, nBins, xMin, xMax), drawoption,
                        "goff,e")

                    full = True

                else:
                    full = False

            elif job.type == 'DATA':

                print '\n----> Job Type: Data...'
                print '            Name: ', job.name

                print '\n---->Drawing Tree for variable: ', treeVar
                print '  with Cuts: ', treeCut

                if 'gg_plus' in treeVar or 'VV' in treeVar:
                    if options['blind']:
                        #treeCut = treeCut + ' & '+treeVar+'<0.3'

                        print '\n\n====== BLINDED ======'
                        print treeCut

                        CuttedTree.Draw(
                            '%s>>%s(%s,%s,%s)' %
                            (treeVar, job.name, nBins, xMin, xMax),
                            '%s' % treeCut, "goff,e")
                    else:
                        CuttedTree.Draw(
                            '%s>>%s(%s,%s,%s)' %
                            (treeVar, job.name, nBins, xMin, xMax),
                            '%s' % treeCut, "goff,e")

                elif '_corr' in treeVar:
                    print 'VAR:', treeVar
                    if '[0]' in treeVar:
                        new_treeVar = 'Jet_pt[hJCidx[0]]/Jet_rawPt[hJCidx[0]]'
                    if '[1]' in treeVar:
                        new_treeVar = 'Jet_pt[hJCidx[1]]/Jet_rawPt[hJCidx[1]]'
                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        (new_treeVar, job.name, nBins, xMin, xMax),
                        '%s' % treeCut, "goff,e")

                elif 'vtx' in treeVar or 'EmEF' in treeVar or '_lepton' in treeVar:
                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        (treeVar, job.name, nBins, xMin, xMax),
                        '%s' % treeCut + ' && ' + treeVar + ' > 0', "goff,e")

                #else:
                #    if options['blind']:
                #        print '!!!!BLINDING!!!'
                #        CuttedTree.Draw('%s>>%s(%s,%s,%s)' %(treeVar,job.name,nBins,xMin,xMax), '%s & V_pt < 50.' %treeCut, "goff,e")

                elif 'LHE' in treeVar or 'HT' in treeVar or 'lhe' in treeVar:
                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        ('Jet_pt[hJCidx[1]]', job.name, nBins, xMin, xMax),
                        '%s' % treeCut + ' && Jet_pt[hJCidx[1]] < 0.0',
                        "goff,e")

                else:
                    print '!!!!NOT BLINDING!!!'
                    CuttedTree.Draw(
                        '%s>>%s(%s,%s,%s)' %
                        (treeVar, job.name, nBins, xMin, xMax), '%s' % treeCut,
                        "goff,e")

                print CuttedTree
                '''
                hReg_metric = hReg.GetRMS()/hReg.GetMean()
                hNom_metric = hNom.GetRMS()/hNom.GetMean()
                percent_improvement = (1-(hReg_metric/hNom_metric))*100
                hReg_std = str(round(hReg.GetRMS(),3))
                hReg_mu  = str(round(hReg.GetMean(),3))
                hNom_std = str(round(hNom.GetRMS(),3))
                hNom_mu  = str(round(hNom.GetMean(),3))
                '''

                full = True

            if full:
                hTree = ROOT.gDirectory.Get(job.name)
                print '\nJob name: ', job.name
                print 'hTree: ', hTree

                # Get Stats
                #hTree.GetRMS()
                #hTree.GetMean()
                #print '\t Mean: ', hTree.GetMean()
                #print '\t RMS : ', hTree.GetRMS()

            else:
                hTree = ROOT.TH1F('%s' % name, '%s' % name, nBins, xMin, xMax)
                hTree.Sumw2()

            # NOW scale the histograms
            if job.type != 'DATA':

                if 'gg_plus' in treeVar or 'VV' in treeVar:
                    if TrainFlag:
                        MC_rescale_factor = 2.
                        print 'I RESCALE BY 2.0'
                    else:
                        MC_rescale_factor = 1.

                    ScaleFactor = self.tc.get_scale(
                        job, self.config) * MC_rescale_factor

                    # for LHE scale shapes we need a different norm
                    if 'LHE_weights_scale_wgt[0]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 0) * MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[1]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 1) * MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 2) * MC_rescale_factor
                    elif 'LHE_weights_scale_wgt[3]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 3) * MC_rescale_factor

                    print '\n-----> Histogram Scale Factor: ', ScaleFactor
                else:
                    ScaleFactor = self.tc.get_scale(job, self.config)

                    # for LHE scale shapes we need a different norm
                    if 'LHE_weights_scale_wgt[0]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 0)
                    elif 'LHE_weights_scale_wgt[1]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 1)
                    elif 'LHE_weights_scale_wgt[2]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 2)
                    elif 'LHE_weights_scale_wgt[3]' in weightF:
                        ScaleFactor = self.tc.get_scale_LHEscale(
                            job, self.config, 3)

                    print '\n-----> Histogram Scale Factor: ', ScaleFactor

                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)

            #print '\t-->import %s\t Integral: %s'%(job.name,hTree.Integral())
            if addOverFlow:
                uFlow = hTree.GetBinContent(0) + hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX() +
                                            1) + hTree.GetBinContent(
                                                hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(0), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(1), 2))
                oFlowErr = ROOT.TMath.Sqrt(
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()), 2) +
                    ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX() +
                                                       1), 2))
                hTree.SetBinContent(1, uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(), oFlow)
                hTree.SetBinError(1, uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(), oFlowErr)

            hTree.SetDirectory(0)
            gDict = {}

            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else:
                #print 'not rebinning %s'%job.name
                gDict[group] = hTree

            hTreeList.append(gDict)

        CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        return hTreeList

    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.35):
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i = 0

        #add all together:
        print '\n\t...calculating rebinning on list:', bg_list

        for job in bg_list:

            #print 'Rebinner BKG_sample:', job.name

            htree = self.get_histos_from_tree(job)[0].values()[0]
            #print 'Rebinner htree:', htree

            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree, 1)
            del htree
            i += 1

        ErrorR = 0
        ErrorL = 0
        TotR = 0
        TotL = 0
        binR = self.rebin_nBins
        binL = 1
        rel = 1.0

        #---- from right
        while rel > tolerance:
            TotR += totalBG.GetBinContent(binR)
            ErrorR = sqrt(ErrorR**2 + totalBG.GetBinError(binR)**2)
            binR -= 1
            if binR < 0: break
            if TotR < 1.: continue
            if not TotR <= 0 and not ErrorR == 0:
                rel = ErrorR / TotR
                #print rel
        #print 'upper bin is %s'%binR

        #---- from left
        rel = 1.0
        while rel > tolerance:
            TotL += totalBG.GetBinContent(binL)
            ErrorL = sqrt(ErrorL**2 + totalBG.GetBinError(binL)**2)
            binL += 1
            if binL > nBins_start: break
            if TotL < 1.: continue
            if not TotL <= 0 and not ErrorL == 0:
                rel = ErrorL / TotL
                #print rel
        #it's the lower edge
        binL += 1
        #print 'lower bin is %s'%binL

        inbetween = binR - binL
        stepsize = int(inbetween) / (int(self.norebin_nBins) - 2)
        modulo = int(inbetween) % (int(self.norebin_nBins) - 2)

        #print 'stepsize %s'% stepsize
        #print 'modulo %s'%modulo
        binlist = [binL]
        for i in range(0, int(self.norebin_nBins) - 3):
            binlist.append(binlist[-1] + stepsize)
        binlist[-1] += modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins + 1)
        # print 'binning set to %s'%binlist
        self.mybinning = Rebinner(
            int(self.norebin_nBins),
            array('d', [-1.0] + [totalBG.GetBinLowEdge(i) for i in binlist]),
            True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts, setup):
        ordered_histo_dict = {}
        for sample in setup:
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        #printc('magenta','','\t--> added %s to %s'%(sample,sample))
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    nSample += 1
        del histo_dicts
        return ordered_histo_dict
示例#19
0
class HistoMaker:
    def __init__(self, samples, path, config, optionsList,GroupDict=None):
        #samples: list of the samples, data and mc
	#path: location of the samples used to perform the plot
	#config: list of the configuration files
	#optionsList: Dictionnary containing information on vars, including the cuts
	#! Read arguments and initialise variables
	print "The options are ", optionsList
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        self.tc = TreeCache(self.cuts,samples,path,config)# created cached tree i.e. create new skimmed trees using the list of cuts
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

    def get_histos_from_tree(self,job,cutOverWrite=None):
        
        print "Histomaker debug1"
        '''Function that produce the trees from a HistoMaker'''
        if self.lumi == 0: 
            raise Exception("You're trying to plot with no lumi")
         
        hTreeList=[]

        print "Histomaker debug2"
        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis','TrainFlag'))
        BDT_add_cut='EventForTraining == 0'

        plot_path = self.config.get('Directories','plotpath')
        addOverFlow=eval(self.config.get('Plot_general','addOverFlow'))

        print "Histomaker debug3"
        # get all Histos at once
        CuttedTree = self.tc.get_tree(job,'1')# retrieve the cuted tree
        # print 'CuttedTree.GetEntries()',CuttedTree.GetEntries()
        # print 'begin self.optionsList',self.optionsList
        # print 'end self.optionsList'

        print "Histomaker debug4"
	#! start the loop over variables (descriebed in options) 
        for options in self.optionsList:
            name=job.name
            if self.GroupDict is None:
                group=job.group
            else:
                group=self.GroupDict[job.name]
            treeVar=options['var']
            name=options['name']
            # print 'options[\'name\']',options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin=float(options['xMin'])
            xMax=float(options['xMax'])
            weightF=options['weight']
            if cutOverWrite:
                treeCut=cutOverWrite
            else:
                treeCut='%s'%(options['cut'])

            #options
            # print 'treeCut',treeCut
            # print 'weightF',weightF
            
            hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
            hTree.Sumw2()
            print "Histomaker debug5"
            print('hTree.name() 1 =',hTree.GetName())
            drawoption = ''
            if job.type != 'DATA':
                if CuttedTree and CuttedTree.GetEntries():
                    if 'RTight' in treeVar or 'RMed' in treeVar: 
                        drawoption = '(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
                        #print drawoption
                    else: 
                        drawoption = '(%s)*(%s)'%(weightF,treeCut)
                    CuttedTree.Draw('%s>>%s' %(treeVar,name), drawoption, "goff,e")
                    print ('Draw: %s>>%s' %(treeVar,name), drawoption, "goff,e")
                    print name
                    print('hTree.name() 2 =',hTree.GetName())
                    full=True
                else:
                    full=False
            elif job.type == 'DATA':
                if options['blind']:
                    if treeVar == 'H.mass':
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),' (%(var)s <90. || %(var)s > 150.) & %(cut)s' %options, "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),'%(var)s < 0. & %(cut)s'%options, "goff,e")

                else:
                    CuttedTree.Draw('%s>>%s' %(treeVar,name),'%s' %treeCut, "goff,e")
                full = True
            # if full:
                # hTree = ROOT.gDirectory.Get(name)
                # print('histo1',ROOT.gDirectory.Get(name))
            # else:
                # hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
                # hTree.Sumw2()
                # print('histo2',ROOT.gDirectory.Get(name))
            print('%s>>%s' %(treeVar,name), drawoption, "goff,e")
            print 'name',hTree
            # if full: print 'hTree',hTree.GetName()
              
            if job.type != 'DATA':
                if 'RTight' in treeVar or 'RMed' in treeVar:
                    if TrainFlag:
                        MC_rescale_factor=2.
                        #print 'I RESCALE BY 2.0'
                    else: 
                        MC_rescale_factor = 1.
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi)*MC_rescale_factor
                else: 
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi)
                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)
            #print '\t-->import %s\t Integral: %s'%(job.name,hTree.Integral())
            if addOverFlow:
            	uFlow = hTree.GetBinContent(0)+hTree.GetBinContent(1)
            	oFlow = hTree.GetBinContent(hTree.GetNbinsX()+1)+hTree.GetBinContent(hTree.GetNbinsX())
            	uFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(0),2)+ROOT.TMath.Power(hTree.GetBinError(1),2))
            	oFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()),2)+ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()+1),2))
            	hTree.SetBinContent(1,uFlow)
            	hTree.SetBinContent(hTree.GetNbinsX(),oFlow)
            	hTree.SetBinError(1,uFlowErr)
            	hTree.SetBinError(hTree.GetNbinsX(),oFlowErr)
            hTree.SetDirectory(0)
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else: 
                #print 'not rebinning %s'%job.name 
                gDict[group] = hTree
            hTreeList.append(gDict)
        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        return hTreeList
       
    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.25):
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i=0
        #add all together:
        print '\n\t...calculating rebinning...'
        for job in bg_list:
            htree = self.get_histos_from_tree(job)[0].values()[0]
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree,1)
            del htree
            i+=1
        ErrorR=0
        ErrorL=0
        TotR=0
        TotL=0
        binR=self.rebin_nBins
        binL=1
        rel=1.0
        #---- from right
        while rel > tolerance:
            TotR+=totalBG.GetBinContent(binR)
            ErrorR=sqrt(ErrorR**2+totalBG.GetBinError(binR)**2)
            binR-=1
            if not TotR == 0 and not ErrorR == 0:
                rel=ErrorR/TotR
                #print rel
        #print 'upper bin is %s'%binR

        #---- from left
        rel=1.0
        while rel > tolerance:
            TotL+=totalBG.GetBinContent(binL)
            ErrorL=sqrt(ErrorL**2+totalBG.GetBinError(binL)**2)
            binL+=1
            if not TotL == 0 and not ErrorL == 0:
                rel=ErrorL/TotL
                #print rel
        #it's the lower edge
        binL+=1
        #print 'lower bin is %s'%binL

        inbetween=binR-binL
        stepsize=int(inbetween)/(int(self.norebin_nBins)-2)
        modulo = int(inbetween)%(int(self.norebin_nBins)-2)

        #print 'stepsize %s'% stepsize
        #print 'modulo %s'%modulo
        binlist=[binL]
        for i in range(0,int(self.norebin_nBins)-3):
            binlist.append(binlist[-1]+stepsize)
        binlist[-1]+=modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins+1)
        #print 'binning set to %s'%binlist
        self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts,setup):
        ordered_histo_dict = {}
        for sample in setup:
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        printc('magenta','','\t--> added %s to %s'%(sample,sample))
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    nSample += 1
        del histo_dicts
        return ordered_histo_dict 
示例#20
0
class HistoMaker:
    def __init__(self, samples, path, config, optionsList,GroupDict=None,filelist=None,mergeplot=False):
        #samples: list of the samples, data and mc
        #path: location of the samples used to perform the plot
        #config: list of the configuration files
        #optionsList: Dictionnary containing information on vars, including the cuts
        #! Read arguments and initialise variables

        if filelist:
            print 'len(filelist)',len(filelist)
        #print "Start Creating HistoMaker"
        #print "=========================\n"
        self.path = path
        self.config = config
        self.optionsList = optionsList
        self.nBins = optionsList[0]['nBins']
        self.lumi=0.
        self.cuts = []
        for options in optionsList:
            self.cuts.append(options['cut'])
        #print "Cuts:",self.cuts
        self.tc = TreeCache(self.cuts,samples,path,config,filelist,mergeplot)# created cached tree i.e. create new skimmed trees using the list of cuts
        if len(filelist)>0 or mergeplot:
            print('ONLY CACHING PERFORMED, EXITING');
            sys.exit(1)
        #print self.cuts
        # self.tc = TreeCache(self.cuts,samples,path,config)
        self._rebin = False
        self.mybinning = None
        self.GroupDict=GroupDict
        self.calc_rebin_flag = False
        VHbbNameSpace=config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(VHbbNameSpace)

        print ""
        print "Done Creating HistoMaker"
        print "========================\n"

    def get_histos_from_tree(self,job,quick=True):
        start_time = time.time()

        #print "=============================================================\n"
        #print "THE SAMPLE IS ",job.name
        #print "=============================================================\n"

        '''Function that produce the trees from a HistoMaker'''
         
        #print "Begin to extract the histos from trees (get_histos_from_tree)"
        #print "=============================================================\n"

        if self.lumi == 0: 
            lumi = self.config.get('Plot_general','lumi')
            #print("You're trying to plot with no lumi, I will use ",lumi)
            self.lumi = lumi
         
        hTreeList=[]

        #get the conversion rate in case of BDT plots
        TrainFlag = eval(self.config.get('Analysis','TrainFlag'))

        # #Remove EventForTraining in order to run the MVA directly from the PREP step
        if not 'PSI' in self.config.get('Configuration','whereToLaunch'):
            BDT_add_cut='((evt%2) == 0 || isData)'
        else:
            UseTrainSample = eval(self.config.get('Analysis','UseTrainSample'))
            if UseTrainSample:
                BDT_add_cut='((evt%2) == 0 || isData)'
            else:
                BDT_add_cut='!((evt%2) == 0 || isData)'

        plot_path = self.config.get('Directories','plotpath')
        addOverFlow=eval(self.config.get('Plot_general','addOverFlow'))

        # get all Histos at once
        #print "The tree in the job is ", job.tree
        CuttedTree = self.tc.get_tree(job,'1')# retrieve the cuted tree
        # print 'CuttedTree.GetEntries()',CuttedTree.GetEntries()
#        print 'begin self.optionsList',self.optionsList
        # print 'end self.optionsList'

        #! start the loop over variables (descriebed in options) 
        First_iter = True
        for options in self.optionsList:
            #if First_iter: print 'The name of the job is', job.name
            name=job.name
            if self.GroupDict is None:
                group=job.group
            else:
                group=self.GroupDict[job.name]
            treeVar=options['var']
            #if First_iter: print("START %s"%treeVar)
            name=options['name']
            # print 'options[\'name\']',options['name']
            if self._rebin or self.calc_rebin_flag:
                nBins = self.nBins
            else:
                nBins = int(options['nBins'])
            xMin=float(options['xMin'])
            xMax=float(options['xMax'])
            weightF=options['weight']
            #Include weight per sample (specialweight)
            if 'PSI' in self.config.get('Configuration','whereToLaunch'):
                weightF="("+weightF+")"
            else:
                weightF="("+weightF+")*(" + job.specialweight +")"

            if 'countHisto' in options.keys() and 'countbin' in options.keys():
                count=getattr(self.tc,options['countHisto'])[options['countbin']]
            else:
                count=getattr(self.tc,"CountWeighted")[0]

            #if cutOverWrite:
            #    treeCut= str(1)
            #else:
            #    treeCut='%s'%(options['cut'])
            treeCut='%s'%(options['cut'])

            treeCut = "("+treeCut+")&&"+job.addtreecut 
            #print 'job.addtreecut ',job.addtreecut
            #options
            #print 'treeCut',treeCut
            #print 'weightF',weightF
            
            hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)

            #If you use extension only
            hTree.Sumw2()
            hTree.SetTitle(job.name)
            #print('hTree.name() 1 =',hTree.GetName())
            #print('treeVar 1 =',treeVar)
            drawoption = ''
#            print("START DRAWING")
            if job.type != 'DATA':
              if CuttedTree and CuttedTree.GetEntries():
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:#added OPT for BDT optimisation
                    drawoption = '(%s)*(%s & %s)'%(weightF,BDT_add_cut,treeCut)
                    #if First_iter: print "I'm appling: ",BDT_add_cut
                    print "I'm appling: ",BDT_add_cut
                    # drawoption = 'sign(genWeight)*(%s)*(%s & %s)'%(weightF,treeCut,BDT_add_cut)
                    #print drawoption
                else: 
                    drawoption = '(%s)*(%s)'%(weightF,treeCut)
                #print ('Draw: %s>>%s' %(treeVar,name), drawoption, "goff,e")
                if First_iter: print 'drawoptions are', drawoption
                nevents = CuttedTree.Draw('%s>>%s' %(treeVar,name), drawoption, "goff,e")
                if First_iter: print 'Number of events are', nevents
                #print 'nevents:',hTree.GetEntries(),' hTree.name() 2 =',hTree.GetName()
                full=True
            elif job.type == 'DATA':
                if options['blind']:
                    lowLimitBlindingMass    = 90
                    highLimitBlindingMass   = 140
                    lowLimitBlindingBDT     = 0
                    lowLimitBlindingDR      = 0.8
                    highLimitBlindingDR     = 1.6
                    if 'H' in treeVar and 'mass' in treeVar:
                        lowLimitBlindingMass =hTree.GetBinLowEdge(hTree.FindBin(lowLimitBlindingMass))
                        highLimitBlindingMass =hTree.GetBinLowEdge(hTree.FindBin(highLimitBlindingMass))+ hTree.GetBinWidth(hTree.GetBin(highLimitBlindingMass))
                        veto = ("(%s <%s || %s > %s)" %(treeVar,lowLimitBlindingMass,treeVar,highLimitBlindingMass))
                        #if First_iter: print "Using veto:",veto
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),veto +'&'+' %(cut)s' %options, "goff,e")
                    elif 'BDT' in treeVar or 'bdt' in treeVar or 'nominal' in treeVar in treeVar:
                        lowLimitBlindingBDT = hTree.GetBinLowEdge(hTree.FindBin(lowLimitBlindingBDT))
                        veto = "(%s <%s)" %(treeVar,lowLimitBlindingBDT)
                        #if First_iter: print "Using veto:",veto
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),veto +'&'+' %(cut)s'%options, "goff,e")
                    elif 'dR' in treeVar and 'H' in treeVar:
                        lowLimit   = hTree.GetBinLowEdge(hTree.FindBin(lowLimitBlindingDR))
                        highLimit  = hTree.GetBinLowEdge(hTree.FindBin(highLimitBlindingDR))
                        veto = ("(%s <%s || %s > %s)" %(treeVar,lowLimitBlindingMass,treeVar,highLimitBlindingMass))
                        #if First_iter: print "Using veto:",veto
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),veto +'&'+' %(cut)s'%options, "goff,e")
                    else:
                        CuttedTree.Draw('%s>>%s' %(treeVar,name),'%s' %treeCut, "goff,e")
                else:
                    if First_iter: print 'DATA drawoptions', '%s>>%s' %(treeVar,name),'%s' %treeCut
                    CuttedTree.Draw('%s>>%s' %(treeVar,name),'%s' %treeCut, "goff,e")
                full = True
            # if full:
                # hTree = ROOT.gDirectory.Get(name)
                # print('histo1',ROOT.gDirectory.Get(name))
            # else:
                # hTree = ROOT.TH1F('%s'%name,'%s'%name,nBins,xMin,xMax)
                # hTree.Sumw2()
                # print('histo2',ROOT.gDirectory.Get(name))
#            print("END DRAWING")
#            print("START RESCALE")
            # if full: print 'hTree',hTree.GetName()
              
            if job.type != 'DATA':
                if 'BDT' in treeVar or 'bdt' in treeVar or 'OPT' in treeVar:
                    if TrainFlag:
                        MC_rescale_factor=2. ##FIXME## only dataset used for training must be rescaled!!
                        #print 'I RESCALE BY 2.0'
                    else: 
                        MC_rescale_factor = 1.
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi, count)*MC_rescale_factor
                else: 
                    ScaleFactor = self.tc.get_scale(job,self.config,self.lumi, count)
                if ScaleFactor != 0:
                    hTree.Scale(ScaleFactor)
                integral = hTree.Integral()
                #print '\t-->import %s\t Integral: %s'%(job.name,integral)
                #print("job:",job.name," ScaleFactor=",ScaleFactor)
                #print("END RESCALE")
                #print("START addOverFlow")
                # !! Brute force correction for histograms with negative integral (problems with datacard) !!
                if integral<0:
                    hTree.Scale(-0.001)
                    #print "#"*30
                    #print "#"*30
                    #print "original integral was:",integral
                    #print "now is:", hTree.Integral()
                    #print "#"*30
                    #print "#"*30
            if addOverFlow:
                uFlow = hTree.GetBinContent(0)+hTree.GetBinContent(1)
                oFlow = hTree.GetBinContent(hTree.GetNbinsX()+1)+hTree.GetBinContent(hTree.GetNbinsX())
                uFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(0),2)+ROOT.TMath.Power(hTree.GetBinError(1),2))
                oFlowErr = ROOT.TMath.Sqrt(ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()),2)+ROOT.TMath.Power(hTree.GetBinError(hTree.GetNbinsX()+1),2))
                hTree.SetBinContent(1,uFlow)
                hTree.SetBinContent(hTree.GetNbinsX(),oFlow)
                hTree.SetBinError(1,uFlowErr)
                hTree.SetBinError(hTree.GetNbinsX(),oFlowErr)
            hTree.SetDirectory(0)
#            print("STOP addOverFlow")
#            print("START rebin")
            gDict = {}
            if self._rebin:
                gDict[group] = self.mybinning.rebin(hTree)
                del hTree
            else: 
                #print 'not rebinning %s'%job.name 
                gDict[group] = hTree
#            print("STOP %s"%treeVar)
            hTreeList.append(gDict)
            First_iter = False
        if CuttedTree: CuttedTree.IsA().Destructor(CuttedTree)
        del CuttedTree
        #print "Finished to extract the histos from trees (get_histos_from_tree)"
        #print "================================================================\n"
        #print "get_histos_from_tree DONE for ",job.name," in ", str(time.time() - start_time)," s."
        return hTreeList
       
    @property
    def rebin(self):
        return self._rebin

    @property
    def rebin(self, value):
        if self._rebin and value:
            return True
        elif self._rebin and not value:
            self.nBins = self.norebin_nBins
            self._rebin = False
        elif not self._rebin and value:
            if self.mybinning is None:
                raise Exception('define rebinning first')
            else:
                self.nBins = self.rebin_nBins
                self._rebin = True
                return True
        elif not self._rebin and not self.value:
            return False

    def calc_rebin(self, bg_list, nBins_start=1000, tolerance=0.25):
        #print "START calc_rebin"
        self.calc_rebin_flag = True
        self.norebin_nBins = copy(self.nBins)
        self.rebin_nBins = nBins_start
        self.nBins = nBins_start
        i=0
        #add all together:
        print '\n\t...calculating rebinning...'
        for job in bg_list:
            #print "job",job
            htree = self.get_histos_from_tree(job)[0].values()[0]
            print "Integral",job,htree.Integral()
            if not i:
                totalBG = copy(htree)
            else:
                totalBG.Add(htree,1)
            del htree
            i+=1
        ErrorR=0
        ErrorL=0
        TotR=0
        TotL=0
        binR=self.rebin_nBins
        binL=1
        rel=1.0
        #print "START loop from right"
        #print "totalBG.Draw("","")",totalBG.Integral()
        #---- from right
        while rel > tolerance:
            TotR+=totalBG.GetBinContent(binR)
            ErrorR=sqrt(ErrorR**2+totalBG.GetBinError(binR)**2)
            binR-=1
            # print 'is this loop infinite ?'
            # print "TotR",TotR
            # print "ErrorR",ErrorR
            # print "rel",rel
            if not TotR == 0 and not ErrorR == 0:
                rel=ErrorR/TotR
                print rel
        #print 'upper bin is %s'%binR
        print "END loop from right"

        #---- from left
        rel=1.0
        print "START loop from left"
        while rel > tolerance:
            TotL+=totalBG.GetBinContent(binL)
            ErrorL=sqrt(ErrorL**2+totalBG.GetBinError(binL)**2)
            binL+=1
            if not TotL == 0 and not ErrorL == 0:
                rel=ErrorL/TotL
                #print rel
        #it's the lower edge
        print "STOP loop from left"
        binL+=1
        #print 'lower bin is %s'%binL

        inbetween=binR-binL
        stepsize=int(inbetween)/(int(self.norebin_nBins)-2)
        modulo = int(inbetween)%(int(self.norebin_nBins)-2)

        #print 'stepsize %s'% stepsize
        #print 'modulo %s'%modulo
        binlist=[binL]
        for i in range(0,int(self.norebin_nBins)-3):
            binlist.append(binlist[-1]+stepsize)
        binlist[-1]+=modulo
        binlist.append(binR)
        binlist.append(self.rebin_nBins+1)
        #print 'binning set to %s'%binlist
        #print "START REBINNER"
        self.mybinning = Rebinner(int(self.norebin_nBins),array('d',[-1.0]+[totalBG.GetBinLowEdge(i) for i in binlist]),True)
        self._rebin = True
        print '\t > rebinning is set <\n'

    @staticmethod
    def orderandadd(histo_dicts,setup):
        '''
        Setup is defined in the plot conf file
        histo_dicts contains an array of dictionnary
        '''

        from array import array
        doubleVariable = array('d',[0])
        
        #print "Start orderandadd"
        #print "=================\n"
        #print "Input dict is", histo_dicts

        ordered_histo_dict = {}
        #print "orderandadd-setup",setup
        #print "orderandadd-histo_dicts",histo_dicts
        for sample in setup:
            nSample = 0
            for histo_dict in histo_dicts:
                if histo_dict.has_key(sample):
                    integral = histo_dict[sample].IntegralAndError(0,histo_dict[sample].GetNbinsX(),doubleVariable)
                    error = doubleVariable[0]
                    entries = histo_dict[sample].GetEntries()
                    subsamplename = histo_dict[sample].GetTitle()
                    if nSample == 0:
                        ordered_histo_dict[sample] = histo_dict[sample].Clone()
                    else:
                        ordered_histo_dict[sample].Add(histo_dict[sample])
                    printc('magenta','','\t--> added %s to %s Integral: %s Entries: %s Error: %s'%(subsamplename,sample,integral,entries,error))
                    nSample += 1
        del histo_dicts
        #print "Output dict is", ordered_histo_dict
        return ordered_histo_dict 
示例#21
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(vhbb_name_space)

        self.__weight = config.get("TrainRegression", "weight")
        self.__vars = config.get("TrainRegression", "vars").split()
        self.__target = config.get("TrainRegression", "target")
        self.__cut = config.get("TrainRegression", "cut")
        self.__title = config.get("TrainRegression", "name")
        self.__signals = config.get("TrainRegression", "signals")
        self.__regOptions = config.get("TrainRegression", "options")
        path = config.get('Directories', 'PREPout')
        samplesinfo = config.get('Directories', 'samplesinfo')
        self.__info = ParseInfo(samplesinfo, path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut], self.__samples, path, config)
        self.__trainCut = config.get("TrainRegression", "trainCut")
        self.__testCut = config.get("TrainRegression", "testCut")
        self.__config = config

    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG' % job.name
            signals.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__trainCut)))
            signalsTest.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__testCut)))

        # Perform regression for the two Higgs jets independently
        for iJet in range(0, 2):

            print '\n========== Performing Regression on Jet', iJet, '==========\n'

            sWeight = 1.
            #fnameOutput='training_Reg_%s_Jet'+str(iJet)+'.root'%(self.__title)
            fnameOutput = 'training_Reg_' + self.__title + '_Jet' + str(
                iJet) + '.root'
            output = ROOT.TFile.Open(
                '/exports/uftrig01a/dcurry/data/bbar/13TeV/heppy/files/regr_out/'
                + fnameOutput, "RECREATE")
            print '\n----- Saving output to ', output

            factory = ROOT.TMVA.Factory(
                'MVA', output,
                '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression'
            )

            #factory.SetSignalWeightExpression( self.__weight )
            #set input trees
            for i, signal in enumerate(signals):
                #print 'signal, sWeight, ROOT.TMVA.Types.kTesting', signal, sWeight, ROOT.TMVA.Types.kTraining
                #print 'tree entries', signal.GetEntries()
                factory.AddRegressionTree(signal, sWeight,
                                          ROOT.TMVA.Types.kTraining)

                factory.AddRegressionTree(signalsTest[i], sWeight,
                                          ROOT.TMVA.Types.kTesting)

                self.__apply = []
                p = re.compile(r'hJet_\w+')
            for var in self.__vars:

                if var == 'rho' or var == 'met_pt':
                    factory.AddVariable(var, 'D')
                    continue

                if iJet == 0:

                    if 'max' in var:
                        var = var.replace(')', '[hJidx[0]])')
                        factory.AddVariable(var, 'D')

                    else:
                        var = var + '[hJidx[0]]'
                        factory.AddVariable(var, 'D')

                else:

                    if 'max' in var:
                        var = var.replace(')', '[hJidx[1]])')
                        factory.AddVariable(var, 'D')

                    else:
                        var = var + '[hJidx[1]]'
                        factory.AddVariable(var, 'D')

                self.__apply.append(p.sub(r'\g<0>[0]', var))
                print(self.__apply)

            factory.AddTarget(self.__target + '%s' % ('[' + str(iJet) + ']'))
            mycut = ROOT.TCut(self.__cut)
            #factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions)
            factory.BookMethod(ROOT.TMVA.Types.kBDT,
                               'BDT_REG_' + self.__title + '_Jet' + str(iJet),
                               self.__regOptions)
            factory.TrainAllMethods()
            #factory.TestAllMethods()
            #factory.EvaluateAllMethods()
            output.Write()
        '''