Exemplo n.º 1
0
    def execute(self):
        """Execute the code!"""
        self.object_level_cuts = ObjectCuts(self.p_settings,self.p_cuts)
        self.event_level_cuts  = EventCuts(self.p_settings,self.p_cuts)

        self.FileLoop()

        return
Exemplo n.º 2
0
class Whiteboard(object):
    """
    Base class for doing simple studies and inheriting object building
    and event loops.
    """
    def __init__(self,cfg_filename):
        """
        Init
        
        @param cfg_filename     Filename of the config file ("PyUser.ini")
        """
        self.p_settings = info.getConfigSettings(cfg_filename)
        self.p_cfg_name = 'configuration'
        
        self.ops        = config.logicOperators()
        self.key2attr   = info.key2attr()
        self.obj2ttree  = info.obj2ttree()
        self.truthObj   = info.truthObject()
        self.nonGeVvars = info.nonGeVvariables()  # variables that aren't in GeV (for scaling plots)
        self.GeV        = 1000.



    def initialize(self):
        """Load the necessary settings from the config file."""
        self.p_objects     = self.p_settings.get(self.p_cfg_name,'objects')
        self.p_scalefactor = config.str2bool(self.p_settings.get(self.p_cfg_name,'eventweights'))
        self.p_lepton      = self.p_settings.get(self.p_cfg_name,'lepton')
        self.p_extra_save  = self.p_settings.get(self.p_cfg_name,'extra_saveAs')
        self.p_plot_type   = self.p_settings.get(self.p_cfg_name,'plot_framework') # python/root
        self.p_plot1d      = self.p_settings.get(self.p_cfg_name,'1dplot') # for saving data
        self.p_plot2d      = self.p_settings.get(self.p_cfg_name,'2dplot') # for saving data
        self.p_cuts        = self.p_settings.get(self.p_cfg_name,'cutsfile')
        self.p_cuts        = config.processCuts(self.p_cuts)
        self.p_variables   = self.p_settings.get(self.p_cfg_name,'variables').split(',')
        self.p_eff_x_vars  = self.p_settings.get(self.p_cfg_name,'eff_x').split(',')
        self.p_treename    = self.p_settings.get(self.p_cfg_name,'treename')
        self.eff_conditions = self.p_settings.get(self.p_cfg_name,'eff_y').split(',')
        self.p_nevents     = int(self.p_settings.get(self.p_cfg_name,'NEvents'))
        p_files            = self.p_settings.get(self.p_cfg_name,'files')
        self.p_files       = open(p_files,'r').readlines()
        self.p_read_data   = config.str2bool(self.p_settings.get(self.p_cfg_name,'read_data'))
        p_custom_vars      = self.p_settings.get(self.p_cfg_name,'custom_variables')
        if p_custom_vars == 'None':
            self.p_custom_vars = ''
        else:
            self.p_custom_vars = p_custom_vars

        ## --  Easy variables to initialize  -- ##
        self.p_btag_wkpt     = info.btagging_WP(self.p_settings.get(self.p_cfg_name,'btag_wkpt'))
        self.outputfilenames = ['data/'+fname.split('/')[-1].split('.')[0]+'_'+self.p_lepton+'_'+self.p_extra_save for fname in self.p_files]

        return self.p_settings



    def execute(self):
        """Execute the code!"""
        self.object_level_cuts = ObjectCuts(self.p_settings,self.p_cuts)
        self.event_level_cuts  = EventCuts(self.p_settings,self.p_cuts)

        self.FileLoop()

        return



    def FileLoop(self):
        """Loop over files"""
        for ff,file in enumerate(self.p_files):
            self.f     = ROOT.TFile.Open(file.strip())
            self.ttree = self.f.Get(self.p_treename)

            ## --  Initialize data for each file  -- ##
            self.init_data(ff)

            ## --  Loop over events  -- ##
            self.EventLoop()

            ## --    Output data     -- ##
            self.SaveData(ff)

        return



    def EventLoop(self):
        """Loop over events."""
        total_entries = self.ttree.GetEntries()
        if self.p_nevents < 0 or self.p_nevents > total_entries:
            nEntries = total_entries
        else:
            nEntries = self.p_nevents
        entry = 0

        print
        print 
        print " RUNNING OVER ",self.f.GetName(),": ",nEntries," entries"
        print

        while entry < nEntries:

            self.ttree.GetEntry(entry)
            if not entry%1000: print " -> Entry ",entry

            ## -- Get some event info
            if self.ttree.ejets:  lep = 'el'
            else:                 lep = 'mu'
            if self.p_lepton!='muel' and lep!=self.p_lepton:
                entry+=1
                continue

            self.BuildObjects()

            ## -- Apply cuts (if necessary)
            failed_event = False
            if self.p_cuts:
                failed_event = self.ApplyCuts()

            if failed_event:
                entry+=1
                continue

            scaleFactor = self.getSF()

            if self.p_plot1d == 'efficiency':
                self.getEff(lep,scaleFactor)
            ## -- Loop over variables that we want
            else:
                custom_vars = addData.custom_variables(self.ttree,self.p_custom_vars,self.event_objects)
                for variable in self.p_variables:
                    try:
                        values    = custom_vars[variable]
                    except KeyError:
                        if '.' in variable:
                            name,attr = variable.split('.')
                            quantity  = self.key2attr[attr.lower()]
                            if type(self.event_objects[name])==list:
                                values = []
                                for py_object in self.event_objects[name]:
                                    values.append( self.getValue(py_object,quantity) )
                            else:
                                values = [ self.getValue(py_object,quantity) ]
                        else:
                            values = self.event_objects[variable]

                    ## -- If there are no values, no need to continue!
                    if not values:
                        continue
                    ## -- Keeping everything as a list for uniform interfaces
                    if type(values)==float:
                        values = [values]

                    scaleFactor = [scaleFactor for _ in values]
                    ## Scale by GeV for variables with that unit
                    if not any(x in variable for x in self.nonGeVvars):
                        values = [i/self.GeV for i in values]

                    ## saving the data to plot later ##
                    self.save_event(variable,values,scaleFactor,lep)


            entry+=1
            # -- end this entry -- #

        return



    def SaveData(self,ff):
        """Save data to json file or root file"""
        if self.use_hists:
            # root file
            self.outfiles[ff].Write()
            self.outfiles[ff].Close()
        else:
            # json
            json.dump(self.data, open(self.outputfilenames[ff] ,'w'))

        return



    def save_json(self,json_var,json_values,json_weight,json_leptonflavor):
        """"""
#         if ',' not in json_var:
#             json_values = json_values[0]
        self.data['mcChannelNumber'] = self.ttree.mcChannelNumber
        self.data[json_var][json_leptonflavor]['value'].append(json_values)  
        self.data[json_var][json_leptonflavor]['weight'].append(json_weight)

        return



    def save_hist(self,r_var,r_values,r_weight,r_leptonflavor):
        """"""
        self.data['mcChannelNumber'] = self.ttree.mcChannelNumber # setting this repeatedly..
        if ',' in r_var:
            self.data[r_var][r_leptonflavor].Fill(r_values[0],r_values[1],r_weight)
        else:
            self.data[r_var][r_leptonflavor].Fill(r_values[0],r_weight)

        return



    def ApplyCuts(self):
        """Apply event- and object-level cuts"""
        obj_cuts = self.object_level_cuts.applyCuts(self.event_objects)
        if not obj_cuts['result']:
            return True # event failed

        self.event_objects = obj_cuts['objects']  # update the dictionary of objects

        evt_cuts = self.event_level_cuts.applyCuts(self.event_objects)
        if not evt_cuts['result']:
            return True # event failed

        self.event_objects = evt_cuts['objects']  # update the dictionary of objects

        return False # event passed



    def getSF(self):
        """Retrieve event weight"""
        sf = 1.
        if self.ttree.mcChannelNumber>0 and self.p_scalefactor:
            ew   = vlq.compute_weight(self.ttree,self.p_btag_wkpt)
            sf  *= ew

        return sf



    def BuildObjects(self):
        """
        Build 4-vectors of objects in event:
        lepton,met,nu,jets,tjets,bjets,fatjets,rcjets,resjets
        """
        objects = self.p_objects.split(',')
        # make 'resjets' be the last thing built (need the jets first)
        if objects[-1]!='resjets' and 'resjets' in objects:
            objects.remove('resjets')
            objects.append('resjets')

        ## possible objects linked to their functions
        import_keys = {'lepton':  lepBase,\
                       'met':     metBase,\
                       'nu':      nuBase,\
                       'jets':    jetBase,\
                       'bjets':   bjetBase,\
                       'tjets':   tjetBase,\
                       'fatjets': fjBase,\
                       'rcjets':  rcBase,\
                       'resjets': resBase}

        self.event_objects = {'vlq_evtype':self.ttree.vlq_evtype}

        ## special treatment for truth information because of how it is saved in truthSelection
        if any(ob.startswith('truth') for ob in objects) or self.p_plot1d=='efficiency':
            objs = truthBase(self.ttree)
            for key in objs.keys():
                self.event_objects[key] = objs[key]
            objects.remove('truth') # just set it up, done with truth information

        for event_object in objects:
            if event_object == 'bjets+jets' or event_object == 'jets+bjets':
                jets  = jetBase(self.ttree)
                bjets = bjetBase(self.ttree)
                jets  = jets+bjets           # merge the objects into one list
                jets.sort(   key=lambda x: x.Pt(), reverse=True) # [0] has the highest pT
                self.event_objects['jets'] = jets
            elif event_object == 'resjets':
                obj = import_keys[event_object](self.event_objects['jets'])
                self.event_objects[event_object] = obj
                obj.sort(   key=lambda x: x.Pt(), reverse=True) # [0] has the highest pT
            else:
                obj = import_keys[event_object](self.ttree)
                self.event_objects[event_object] = obj
                try:
                    obj.sort(   key=lambda x: x.Pt(), reverse=True) # [0] has the highest pT
                except AttributeError:
                    continue

        if 'jets' in objects or 'bjets' in objects or 'bjets+jets' in objects:
            self.event_objects['nbtags'] = len( [q for q in self.event_objects['jets'] if q.mv2c20>self.p_btag_wkpt] )

        if 'bjets' not in objects:
            self.event_objects['bjets'] = []
        self.event_objects['resjets'] = []

        if 'ht' in self.p_variables:
            self.event_objects['ht'] = vlq.calcHT(self.event_objects)

        return



    def init_data(self,f_ind):
        # self.p_variables
        self.data = {}

        self.use_hists = False
        self.use_json  = True
        
        if self.p_plot_type == 'python':
            if self.p_plot1d == 'efficiency':
                self.use_hists = True
            else:
                self.use_json  = True
        elif self.p_plot_type == 'root':
            self.use_hists = True
        else:
            print " UNKNOWN PLOT TYPE. EXITING "
            import sys
            sys.exit(1)


        # putting for loop inside this if statement because I don't want to check it every time
        if self.use_hists:
            import pyDataMC.datamc_dicts as datamc_dicts
            self.outfiles = [ROOT.TFile(of+'.root',"recreate") for of in self.outputfilenames]
            plot_keys     = datamc_dicts.text_dicts()
            n_bins        = 1000
            for eff_c,var in zip(self.eff_conditions,self.p_variables):
                eff_c = re.search(r'\d+',eff_c).group()
                # check for 2d histogram
                if ',' in var:
                    vars       = var.split(',')
                    ttree_vars = []
                    bins       = []
                    for var in vars:
                        ttree_name = self.variablename2ttreename(var)
                        bins.append(array('d',plot_keys['variables'][ttree_name]['bins']))
                    self.data[var] = {'mu':ROOT.TH2D(var+'_mu_name',var+'_mu_title', n_bins,bins[0],n_bins,bins[1] ),\
                                      'el':ROOT.TH2D(var+'_el_name',var+'_el_title', n_bins,bins[0],n_bins,bins[1] )}
                # else it's a 1d histogram
                else:
                    try:
                        bins = np.asarray(plot_keys['variables'][self.p_eff_x_vars[0]]['bins'])
                    except KeyError:
                        v_name     = var.split('_')
                        ttree_name = self.obj2ttree[v_name[0]]+'_'+self.key2attr[v_name[1]]
                        bins       = np.asarray(plot_keys['variables'][ttree_name]['bins'])
                    if self.p_plot1d == 'efficiency':
                        self.data[var+eff_c] = {'el':
                                         {'total':ROOT.TH1D(var+'_el_name_t_'+eff_c,var+'_el_title_t_'+eff_c, n_bins,bins[0],bins[-1] ),\
                                          'good': ROOT.TH1D(var+'_el_name_g_'+eff_c,var+'_el_title_g_'+eff_c, n_bins,bins[0],bins[-1] ),\
                                          'mis':  ROOT.TH1D(var+'_el_name_m_'+eff_c,var+'_el_title_m_'+eff_c, n_bins,bins[0],bins[-1] )},\
                                          'mu':
                                          {'total':ROOT.TH1D(var+'_mu_name_t_'+eff_c,var+'_mu_title_t_'+eff_c, n_bins,bins[0],bins[-1] ),\
                                          'good':  ROOT.TH1D(var+'_mu_name_g_'+eff_c,var+'_mu_title_g_'+eff_c, n_bins,bins[0],bins[-1] ),\
                                          'mis':   ROOT.TH1D(var+'_mu_name_m_'+eff_c,var+'_mu_title_m_'+eff_c, n_bins,bins[0],bins[-1] )}}
                    else:
                        self.data[var] = {'el':ROOT.TH1D(var+'_el_name',var+'_el_title', n_bins,bins ),\
                                          'mu':ROOT.TH1D(var+'_mu_name',var+'_mu_title', n_bins,bins )}
        elif self.use_json:
            self.save_event      = self.save_json
            self.outputfilenames = self.outputfilenames[f_ind]+'.json'
            for var in self.p_variables:
                self.data[var] = {'el':{'value':[],'weight':[]},\
                                  'mu':{'value':[],'weight':[]}} # can support 2d (use [a,b])
        else:
            print " NEED JSON OR ROOT OUTPUT. EXITING "
            sys.exit(1)

        return



    def getEff(self,emu,event_weight):
        """
        Get efficiency and save to histogram
        """
        custom_vars    = addData.custom_variables(self.ttree,self.p_custom_vars,self.event_objects)
        eff_conditions = self.p_settings.get(self.p_cfg_name,'eff_y').split(',') # things like ==1
        logic_operator = [] 
        good_condition = []
        for ec,eff_c in enumerate(eff_conditions):
            value = re.search(r'\d+',eff_c).group()  # '==1' -> 1
            cond  = eff_c.split(value)[0]            # '==1' -> '=='
            if eff_c.endswith('.'):
                good_condition.append( float(value) )
            else:
                good_condition.append( int(value) )
            logic_operator.append( cond  )

        # self.p_eff_x_vars # loop variables that we are getting the efficiency as a function of!
        # self.p_variables  # loop variables that we are getting the efficiency of!

        for eff_x_var in self.p_eff_x_vars:
            for vv,variable in enumerate(self.p_variables):
        
                eff_c = value = re.search(r'\d+',self.eff_conditions[vv]).group()  # '==1' -> 1
                if eff_x_var in custom_vars.keys():
                    eff_x_val = custom_vars[eff_x_var] # getattr( reco,quantity )()
                else:
                    eff_x_val = None 
                    # set it somewhere else; user needs to do this! (too modular...)
                    # something like the object pT (below) or another object's pT (define here)

                _values = {'total':[],'good':[],'mis':[]}
                reco_name,attr = variable.split('_')
                quantity       = self.key2attr[attr.lower()]
                truth_name     = self.truthObj[reco_name]['name']
                dr_match       = self.truthObj[reco_name]['dr']  # deltaR match value
                reco_object    = self.event_objects[reco_name]
                truth_object   = self.event_objects[truth_name]

                # -- DeltaR match and fill hists
                #    loop over objects (if more than 1)
                if type(reco_object)==list:
                    for reco in reco_object:
                        # fill total (matched)
                        if reco.DeltaR(truth_object) < dr_match:
                            result = self.getValue(reco,quantity)
                            self.data[variable+eff_c][emu]['total'].Fill(eff_x_val,event_weight)
                            # fill good  (matched & tagged)
                            if self.ops[logic_operator[vv]](result,good_condition[vv]):
                                self.data[variable+eff_c][emu]['good'].Fill(eff_x_val,event_weight)
                            # fill mis   (matched & mis-tagged)
                            else:
                                self.data[variable+eff_c][emu]['mis'].Fill(eff_x_val,event_weight)
                else:
                    if reco_object.DeltaR(truth_object) < dr_match:
                        # fill total (matched)
                        result = self.getValue(reco,quantity)
                        self.data[variable+eff_c][emu]['total'].Fill(eff_x_val,event_weight)
                        # fill good  (matched & tagged)
                        if self.ops[logic_operator[vv]](result,good_condition[vv]):
                            self.data[variable+eff_c][emu]['good'].Fill(eff_x_val,event_weight)
                        # fill mis   (matched & mis-tagged)
                        else:
                            self.data[variable+eff_c][emu]['mis'].Fill(eff_x_val,event_weight)

        return




    def getValue(self,g_value,g_quant):
        """"""
        try:
            g_result = getattr( g_value,g_quant )()
        except:
            g_result = getattr( g_value,g_quant )

        return g_result



    def varname2ttreename(self,variable_name):
        """
        The ttree branches and objects in PyMiniAna aren't the same name.
        I should probably fix this at some point...
        This function converts the object name to the ttree name.
        """
        if variablename.split('.')[0] in self.obj2ttree.keys():
            v_name      = variablename.split('.')
            first_name  = self.obj2ttree[v_name[0]]  # should be correct
            second_name = self.key2attr[v_name[1]]   # may need to be lower-case
            ttree_name  = first_name+'_'+second_name
            try:
                v_dict  = self.plot_keys['variables'][ttree_name]
            except KeyError:
                ttree_name = first_name+'_'+second_name.lower()
        else:
            ttree_name = variablename

        return ttree_name