Пример #1
0
    def checkDatacardRegions(self):
        datacardRegions = self.config.getDatacardRegions()
        samples = ParseInfo(config=self.config)
        availableSampleNames = [x.name for x in samples]
        for datacardRegion in datacardRegions:
            try:
                cutName = self.config.getDatacardCutName(datacardRegion)
                cutString = self.config.getCutString(cutName)
                print("datacard region:", datacardRegion)
                print("  ->", cutName)
                print("    ->", cutString)

                regionType = self.config.getDatacardRegionType(datacardRegion)
                print("  -> TYPE:", regionType)

                signals = self.config.getDatacardRegionSignals(datacardRegion)
                backgrounds = self.config.getDatacardRegionBackgrounds(
                    datacardRegion)
                print("  -> SIG:\x1b[34m", signals, "\x1b[0m")
                print("  -> BKG:\x1b[35m", backgrounds, "\x1b[0m")

                for x in list(signals) + list(backgrounds):
                    if x not in availableSampleNames:
                        print("ERROR: not found: sample for datacard:", x)
                        raise Exception("SampleNotFound")

            except Exception as e:
                self.addError('datacard region',
                              datacardRegion + ' ' + repr(e))
Пример #2
0
 def checkSamples(self):
     samples = ParseInfo(config=self.config)
     availableSampleNames = [x.name for x in samples]
     usedSampleNames = self.config.getUsedSamples()
     for sampleName in usedSampleNames:
         print(sampleName)
         if sampleName not in availableSampleNames:
             print("ERROR: not found sample:", sampleName)
             raise Exception("SampleNotFound")
Пример #3
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(vhbb_name_space)
        
        self.__weight = config.get("TrainRegression","weight")
        self.__vars = config.get("TrainRegression","vars").split()
        self.__target = config.get("TrainRegression","target")
        self.__cut = config.get("TrainRegression","cut")
        self.__title = config.get("TrainRegression","name")
        self.__signals = config.get("TrainRegression","signals")
        self.__regOptions = config.get("TrainRegression","options")
        path = config.get('Directories','PREPout')
        samplesinfo=config.get('Directories','samplesinfo')
        self.__info = ParseInfo(samplesinfo,path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut],self.__samples,path,config)
        self.__trainCut = config.get("TrainRegression","trainCut")
        self.__testCut = config.get("TrainRegression","testCut")
        self.__config = config
        
    
    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG'%job.name
            signals.append(self.__tc.get_tree(job,'%s & %s' %(self.__cut,self.__trainCut)))
            signalsTest.append(self.__tc.get_tree(job,'%s & %s'%(self.__cut,self.__testCut)))
            
        # Get the tree from signal
        tree = signals[0]
        
        sWeight = 1.
        fnameOutput='training_Reg_'+self.__title+'.root'
        output = ROOT.TFile.Open('/exports/uftrig01a/dcurry/data/bbar/13TeV/heppy/files/regr_out/'+fnameOutput, "RECREATE")
        print '\n----- Saving output to ', output
        
        factory = ROOT.TMVA.Factory('MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression')
        
        for i, signal in enumerate(signals):
            factory.AddRegressionTree( signal,  sWeight, ROOT.TMVA.Types.kTraining)
            factory.AddRegressionTree( signalsTest[i],  sWeight, ROOT.TMVA.Types.kTesting)
            
            for var in self.__vars:
                factory.AddVariable(var,'D')
                
            factory.AddTarget(self.__target )
            mycut = ROOT.TCut( self.__cut )
            factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) 
            factory.TrainAllMethods()
            factory.TestAllMethods()
            factory.EvaluateAllMethods()
            output.Write()
        
        '''
Пример #4
0
 def __init__(self, config):
     vhbb_name_space = config.get('VHbbNameSpace','library')
     ROOT.gSystem.Load(vhbb_name_space)
     
     self.__weight = config.get("TrainRegression","weight")
     self.__vars = config.get("TrainRegression","vars").split()
     self.__target = config.get("TrainRegression","target")
     self.__cut = config.get("TrainRegression","cut")
     self.__title = config.get("TrainRegression","name")
     self.__signals = config.get("TrainRegression","signals")
     self.__regOptions = config.get("TrainRegression","options")
     path = config.get('Directories','PREPout')
     samplesinfo=config.get('Directories','samplesinfo')
     self.__info = ParseInfo(samplesinfo,path)
     self.__samples = self.__info.get_samples(self.__signals)
     self.__tc = TreeCache([self.__cut],self.__samples,path,config)
     self.__trainCut = config.get("TrainRegression","trainCut")
     self.__testCut = config.get("TrainRegression","testCut")
     self.__config = config
Пример #5
0
 def __init__(self, config):
     vhbb_name_space = config.get('VHbbNameSpace','library')
     ROOT.gSystem.Load(vhbb_name_space)
     
     self.__weight = config.get("TrainRegression","weight")
     self.__vars = config.get("TrainRegression","vars").split()
     self.__target = config.get("TrainRegression","target")
     self.__cut = config.get("TrainRegression","cut")
     self.__title = config.get("TrainRegression","name")
     self.__signals = config.get("TrainRegression","signals")
     self.__regOptions = config.get("TrainRegression","options")
     path = config.get('Directories','PREPout')
     samplesinfo=config.get('Directories','samplesinfo')
     self.__info = ParseInfo(samplesinfo,path)
     self.__samples = self.__info.get_samples(self.__signals)
     self.__tc = TreeCache([self.__cut],self.__samples,path,config)
     self.__trainCut = config.get("TrainRegression","trainCut")
     self.__testCut = config.get("TrainRegression","testCut")
     self.__config = config
Пример #6
0
        '/mnt/t3nfs01/data01/shome/krgedia/CMSSW_10_1_0/src/Xbb/python/Zvv2017config/paths.ini'
    )  #parent class 'ConfigParser' method
    configFiles = pathconfig.get('Configuration', 'List').split(' ')
    config = BetterConfigParser.BetterConfigParser()
    for configFile in configFiles:
        print(configFile)
        config.read('Zvv2017config/' + configFile)

    #print(config.get('Weights','weightF'))
    #config = XbbConfigReader.read('Zvv2017')

    inputFile = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8/tree_aa5e971734ef4e885512748d534e6937ff03dc61feed21b6772ba943_000000_000000_0000_9_a6c5a52b56e5e0c7ad5aec31429c8926bf32cf39adbe087f05cfb323.root'
    path = 'root://t3dcachedb03.psi.ch:1094//pnfs/psi.ch/cms/trivcat/store/user/berger_p2/VHbb/VHbbPostNano2017/V5/Zvv/rerun/v4j/eval/'
    samplefiles = '../samples/VHbbPostNano2017_V5/merged_Zvv2017/'
    samplesinfo = 'Zvv2017config/samples_nosplit.ini'
    info = ParseInfo(samples_path=path, config=config)

    sample = [
        x for x in info
        if x.identifier == 'ggZH_HToBB_ZToNuNu_M125_13TeV_powheg_pythia8'
    ][0]

    # read sample
    sampleTree = SampleTree([inputFile], config=config)

    # initialize module
    w = WeightAsBranch()
    w.customInit({
        'sampleTree': sampleTree,
        'config': config,
        'sample': sample,
Пример #7
0
    pathOUT = config.get('Directories', 'SYSout')
elif opts.mergeeval == 'True':
    pathIN = config.get('Directories', 'MVAin')
    pathOUT = config.get('Directories', 'MVAout')
else:
    pathIN = config.get('Directories', 'PREPin')
    pathOUT = config.get('Directories', 'PREPout')

samplesinfo = config.get('Directories', 'samplesinfo')
sampleconf = BetterConfigParser()
sampleconf.read(samplesinfo)

whereToLaunch = config.get(
    'Configuration', 'whereToLaunch')  # USEFUL IN CASE OF SITE BY SITE OPTIONS
prefix = sampleconf.get('General', 'prefix')
info = ParseInfo(samplesinfo, pathIN)
print "samplesinfo:", samplesinfo


def mergetreePSI(pathIN, pathOUT, prefix, newprefix, folderName, Aprefix, Acut,
                 config):
    '''
    List of variables
    pathIN: path of the input file containing the data
    pathOUT: path of the output files
    prefix: "prefix" variable from "samples_nosplit.cfg"
    newprefix: "newprefix" variable from "samples_nosplit.cfg"
    file: sample header (as DYJetsToLL_M-50_TuneZ2Star_8TeV-madgraph-tarball)
    Aprefix: empty string ''
    Acut: the sample cut as defined in "samples_nosplit.cfg"
    '''
Пример #8
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace','library')
        ROOT.gSystem.Load(vhbb_name_space)
        
        self.__weight = config.get("TrainRegression","weight")
        self.__vars = config.get("TrainRegression","vars").split()
        self.__target = config.get("TrainRegression","target")
        self.__cut = config.get("TrainRegression","cut")
        self.__title = config.get("TrainRegression","name")
        self.__signals = config.get("TrainRegression","signals")
        self.__regOptions = config.get("TrainRegression","options")
        path = config.get('Directories','PREPout')
        samplesinfo=config.get('Directories','samplesinfo')
        self.__info = ParseInfo(samplesinfo,path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut],self.__samples,path,config)
        self.__trainCut = config.get("TrainRegression","trainCut")
        self.__testCut = config.get("TrainRegression","testCut")
        self.__config = config
        
    
    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG'%job.name
            signals.append(self.__tc.get_tree(job,'%s & %s' %(self.__cut,self.__trainCut)))
            signalsTest.append(self.__tc.get_tree(job,'%s & %s'%(self.__cut,self.__testCut)))
        
        sWeight = 1.
        fnameOutput='training_Reg_%s.root' %(self.__title)
        output = ROOT.TFile.Open(fnameOutput, "RECREATE")

        factory = ROOT.TMVA.Factory('MVA', output, '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression')
        #factory.SetSignalWeightExpression( self.__weight )
        #set input trees
        for i, signal in enumerate(signals):
            factory.AddRegressionTree( signal,  sWeight, ROOT.TMVA.Types.kTraining)
            factory.AddRegressionTree( signalsTest[i],  sWeight, ROOT.TMVA.Types.kTesting)
        self.__apply = []
        p = re.compile(r'hJet_\w+')
        for var in self.__vars:
            factory.AddVariable(var,'D') # add the variables
            self.__apply.append(p.sub(r'\g<0>[0]', var))
            print (self.__apply)
            
        factory.AddTarget( self.__target )
        mycut = ROOT.TCut( self.__cut )
        factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions) # book an MVA method
        factory.TrainAllMethods()
        factory.TestAllMethods()
        factory.EvaluateAllMethods()
        output.Write()
        regDict = dict(zip(self.__vars, self.__apply)) 
        self.__config.set('Regression', 'regWeight', '../data/MVA_BDT_REG_%s.weights.xml' %self.__title)
        self.__config.set('Regression', 'regDict', '%s' %regDict)
        self.__config.set('Regression', 'regVars', '%s' %self.__vars)
        for section in self.__config.sections():
            if not section == 'Regression':
                self.__config.remove_section(section)
        with open('8TeVconfig/appReg', 'w') as configfile:
            self.__config.write(configfile)
        with open('8TeVconfig/appReg', 'r') as configfile:
            for line in configfile:
                print line.strip()
Пример #9
0
        hash = hashlib.sha224(minCut).hexdigest()
        print 'pathOUT_orig', pathOUT_orig
        print hash

    grep_hash = ''
    if hash != '':
        grep_hash = ' |grep ' + hash

    mc_dataset_missing_files = []
    data_dataset_missing_files = []
    dataset_identifiers = []

    if opts.filelist == "" or opts.names == 'nosample':
        # print "info:",info
        info = ParseInfo(samplesinfo, pathOUT_orig)
        # if opts.names == ""
        for job in info:
            dataset_identifiers.append(job.identifier)
        dataset_identifiers = set(dataset_identifiers)
        for identifier in dataset_identifiers:
            sampleType = config.get(identifier, 'sampleType')
            print 'sampleType', sampleType

            # print identifier
            # identifier=opts.names.split(',')[0]
            print "identifier:", identifier
            pathOUT = pathOUT_orig + '/' + identifier
            filenames = open(samplefiles + '/' + identifier +
                             '.txt').readlines()
            print 'number of files on DAS:', len(filenames),  #filenames[0]
Пример #10
0
 def getSamplesInfo(self):
     if self.samplesInfo is None:
         self.samplesInfo = ParseInfo(config=self.config)
     return self.samplesInfo
Пример #11
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(vhbb_name_space)

        self.__weight = config.get("TrainRegression", "weight")
        self.__vars = config.get("TrainRegression", "vars").split()
        self.__target = config.get("TrainRegression", "target")
        self.__cut = config.get("TrainRegression", "cut")
        self.__title = config.get("TrainRegression", "name")
        self.__signals = config.get("TrainRegression", "signals")
        self.__regOptions = config.get("TrainRegression", "options")
        path = config.get('Directories', 'PREPout')
        samplesinfo = config.get('Directories', 'samplesinfo')
        self.__info = ParseInfo(samplesinfo, path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut], self.__samples, path, config)
        self.__trainCut = config.get("TrainRegression", "trainCut")
        self.__testCut = config.get("TrainRegression", "testCut")
        self.__config = config

    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG' % job.name
            signals.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__trainCut)))
            signalsTest.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__testCut)))

        sWeight = 1.
        fnameOutput = 'training_Reg_%s.root' % (self.__title)
        output = ROOT.TFile.Open(fnameOutput, "RECREATE")

        factory = ROOT.TMVA.Factory(
            'MVA', output,
            '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression'
        )
        #factory.SetSignalWeightExpression( self.__weight )
        #set input trees
        for i, signal in enumerate(signals):
            factory.AddRegressionTree(signal, sWeight,
                                      ROOT.TMVA.Types.kTraining)
            factory.AddRegressionTree(signalsTest[i], sWeight,
                                      ROOT.TMVA.Types.kTesting)
        self.__apply = []
        p = re.compile(r'hJet_\w+')
        for var in self.__vars:
            factory.AddVariable(var, 'D')  # add the variables
            self.__apply.append(p.sub(r'\g<0>[0]', var))

        factory.AddTarget(self.__target)
        mycut = ROOT.TCut(self.__cut)
        factory.BookMethod(ROOT.TMVA.Types.kBDT, 'BDT_REG_%s' % (self.__title),
                           self.__regOptions)  # book an MVA method
        factory.TrainAllMethods()
        factory.TestAllMethods()
        factory.EvaluateAllMethods()
        output.Write()
        regDict = dict(zip(self.__vars, self.__apply))
        self.__config.set('Regression', 'regWeight',
                          '../data/MVA_BDT_REG_%s.weights.xml' % self.__title)
        self.__config.set('Regression', 'regDict', '%s' % regDict)
        self.__config.set('Regression', 'regVars', '%s' % self.__vars)
        for section in self.__config.sections():
            if not section == 'Regression':
                self.__config.remove_section(section)
        with open('8TeVconfig/appReg', 'w') as configfile:
            self.__config.write(configfile)
        with open('8TeVconfig/appReg', 'r') as configfile:
            for line in configfile:
                print line.strip()
Пример #12
0
                        self.quickloadWarningShown = True
                        print("INFO: SetQuickLoad(1) called for formula:",
                              formulaName)
                        print(
                            "INFO: -> EvalInstance(0) on formulas will not re-load branches but will take values from memory, which might have been modified by this module."
                        )
                    treeFormula.SetQuickLoad(1)
                #    print("\x1b[31mERROR: this module can't be used together with others which use formulas based on branches changed inside this module!\x1b[0m")
                #    raise Exception("NotImplemented")


if __name__ == '__main__':

    config = XbbConfigReader.read('Wlv2017')

    info = ParseInfo(config=config)

    sample = [
        x for x in info
        if x.identifier == 'WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8'
    ][0]
    # read sample
    sampleTree = SampleTree([
        '/store/group/phys_higgs/hbb/ntuples/VHbbPostNano/2017/V11/WplusH_HToBB_WToLNu_M125_13TeV_powheg_pythia8/adewit-crab_nano2017_WplusH_HT81/190606_065851/0000/tree_1.root'
    ],
                            treeName='Events',
                            xrootdRedirector="root://eoscms.cern.ch/")
    # initialize module
    w = JetSmearer("2017")
    w.customInit({
        'sampleTree': sampleTree,
Пример #13
0
class RegressionTrainer():
    def __init__(self, config):
        vhbb_name_space = config.get('VHbbNameSpace', 'library')
        ROOT.gSystem.Load(vhbb_name_space)

        self.__weight = config.get("TrainRegression", "weight")
        self.__vars = config.get("TrainRegression", "vars").split()
        self.__target = config.get("TrainRegression", "target")
        self.__cut = config.get("TrainRegression", "cut")
        self.__title = config.get("TrainRegression", "name")
        self.__signals = config.get("TrainRegression", "signals")
        self.__regOptions = config.get("TrainRegression", "options")
        path = config.get('Directories', 'PREPout')
        samplesinfo = config.get('Directories', 'samplesinfo')
        self.__info = ParseInfo(samplesinfo, path)
        self.__samples = self.__info.get_samples(self.__signals)
        self.__tc = TreeCache([self.__cut], self.__samples, path, config)
        self.__trainCut = config.get("TrainRegression", "trainCut")
        self.__testCut = config.get("TrainRegression", "testCut")
        self.__config = config

    def train(self):
        signals = []
        signalsTest = []
        for job in self.__samples:
            print '\tREADING IN %s AS SIG' % job.name
            signals.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__trainCut)))
            signalsTest.append(
                self.__tc.get_tree(job,
                                   '%s & %s' % (self.__cut, self.__testCut)))

        # Perform regression for the two Higgs jets independently
        for iJet in range(0, 2):

            print '\n========== Performing Regression on Jet', iJet, '==========\n'

            sWeight = 1.
            #fnameOutput='training_Reg_%s_Jet'+str(iJet)+'.root'%(self.__title)
            fnameOutput = 'training_Reg_' + self.__title + '_Jet' + str(
                iJet) + '.root'
            output = ROOT.TFile.Open(
                '/exports/uftrig01a/dcurry/data/bbar/13TeV/heppy/files/regr_out/'
                + fnameOutput, "RECREATE")
            print '\n----- Saving output to ', output

            factory = ROOT.TMVA.Factory(
                'MVA', output,
                '!V:!Silent:!Color:!DrawProgressBar:Transformations=I:AnalysisType=Regression'
            )

            #factory.SetSignalWeightExpression( self.__weight )
            #set input trees
            for i, signal in enumerate(signals):
                #print 'signal, sWeight, ROOT.TMVA.Types.kTesting', signal, sWeight, ROOT.TMVA.Types.kTraining
                #print 'tree entries', signal.GetEntries()
                factory.AddRegressionTree(signal, sWeight,
                                          ROOT.TMVA.Types.kTraining)

                factory.AddRegressionTree(signalsTest[i], sWeight,
                                          ROOT.TMVA.Types.kTesting)

                self.__apply = []
                p = re.compile(r'hJet_\w+')
            for var in self.__vars:

                if var == 'rho' or var == 'met_pt':
                    factory.AddVariable(var, 'D')
                    continue

                if iJet == 0:

                    if 'max' in var:
                        var = var.replace(')', '[hJidx[0]])')
                        factory.AddVariable(var, 'D')

                    else:
                        var = var + '[hJidx[0]]'
                        factory.AddVariable(var, 'D')

                else:

                    if 'max' in var:
                        var = var.replace(')', '[hJidx[1]])')
                        factory.AddVariable(var, 'D')

                    else:
                        var = var + '[hJidx[1]]'
                        factory.AddVariable(var, 'D')

                self.__apply.append(p.sub(r'\g<0>[0]', var))
                print(self.__apply)

            factory.AddTarget(self.__target + '%s' % ('[' + str(iJet) + ']'))
            mycut = ROOT.TCut(self.__cut)
            #factory.BookMethod(ROOT.TMVA.Types.kBDT,'BDT_REG_%s'%(self.__title),self.__regOptions)
            factory.BookMethod(ROOT.TMVA.Types.kBDT,
                               'BDT_REG_' + self.__title + '_Jet' + str(iJet),
                               self.__regOptions)
            factory.TrainAllMethods()
            #factory.TestAllMethods()
            #factory.EvaluateAllMethods()
            output.Write()
        '''