예제 #1
0
    def create(self):
        """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])
  
        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_prodNtuple, process_name))  
    
            inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
            for jobId in inputFileList.keys():
            
                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.inputFiles[key_file])
                    continue
                ##print "sample = '%s', jobId = %i: number of input files = %i" % (sample_name, jobId, len(self.inputFiles[key_file]))
                ##print self.inputFiles[key_file]
                assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%s_%i_cfg.py" % \
                  (self.channel, process_name, jobId))
                self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \
                  (process_name, jobId))
                self.logFiles_prodNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%s_%i.log" % \
                  (self.channel, process_name, jobId))
                self.createCfg_prodNtuple(self.inputFiles[key_file], self.outputFiles[key_file], self.era, self.cfgFiles_prodNtuple_modified[key_file])
                
        if self.is_sbatch:
            logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_prodNtuple)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_prodNtuple(lines_makefile)
        #self.addToMakefile_clean(lines_makefile)
        self.createMakefile(lines_makefile)
  
        logging.info("Done")
예제 #2
0
  def __init__(self, outputDir, executable_analyze, samples, lepton_selections, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               histograms_to_fit = [], select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "charge_flip", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples
    
    self.prep_dcard_processesToCopy = ["data_obs", "DY", "DY_fake", "WJets", "TTbar", "Singletop", "Diboson"]
    self.prep_dcard_signals = [ "DY" ]
    
    self.lepton_selections = lepton_selections
    
    #self.hadTau_selection = hadTau_selection

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
          key_dir = getKey(sample_name, lepton_selection)  
          for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_RLES  ]:
            initDict(self.dirs, [ key_dir, dir_type ])
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
              "_".join([ lepton_selection ]), process_name)
    ##print "self.dirs = ", self.dirs

    self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_charge_flip_cfg.py")
    self.cfgFile_prep_dcard_original = os.path.join(self.workingDir, "prepareDatacards_cfg.py")
    #self.histogramDir_prep_dcard = "charge_flip_SS_Tight"
    self.select_rle_output = select_rle_output
예제 #3
0
    def __init__(self, treeName, outputDir, executable_addMEM, samples, era, debug, running_method,
                 max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs):

        self.treeName = treeName
        self.outputDir = outputDir
        self.executable_addMEM = executable_addMEM
        self.channel = "2lss_1tau"
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.samples = samples
        self.era = era
        self.debug = debug
        assert(running_method.lower() in [
          "sbatch", "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(
          self.outputDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(os.path.join(
          self.outputDir, "stdout_%s.log" % self.channel), 'w', 'utf-8')
        self.stderr_file = codecs.open(os.path.join(
          self.outputDir, "stderr_%s.log" % self.channel), 'w', 'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFile_addMEM_original = os.path.join(self.workingDir, "addMEM_2lss_1tau_cfg.py")
        self.cfgFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(
          self.outputDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [ DKEY_CFGS, DKEY_NTUPLES, DKEY_FINAL_NTUPLES, DKEY_LOGS, DKEY_HADD ]:
                initDict(self.dirs, [ key_dir, dir_type ])
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
예제 #4
0
  def createCfg_prep_dcard(self, histogramToFit, histogramDir = None, label = None):
    """Fills the template of python configuration file for datacard preparation

    Args:
      histogramToFit: name of the histogram used for signal extraction
    """
    datacardFile = os.path.join(self.outputDir, DKEY_DCRD, "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit))
    category_output = self.channel
    cfgFile_modified = os.path.join(self.outputDir, DKEY_CFGS, "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit))
    key = histogramToFit
    if not histogramDir:
        histogramDir = self.histogramDir_prep_dcard
        if label:
            datacardFile = datacardFile.replace(channel, "%s_%s" % (channel, label))
            category_output += "_%s" % label
            cfgFile_modified = cfgFile_modified.replace("_cfg.py", "_%s_cfg.py" % label)
            key = getKey(histogramToFit, label)
    lines = []
    lines.append("process.fwliteInput.fileNames = cms.vstring('%s')" % self.histogramFile_hadd_stage2)
    lines.append("process.fwliteOutput.fileName = cms.string('%s')" % datacardFile)
    lines.append("process.prepareDatacards.processesToCopy = cms.vstring(%s)" % self.prep_dcard_processesToCopy)
    lines.append("process.prepareDatacards.signals = cms.vstring(%s)" % self.prep_dcard_signals)
    lines.append("process.prepareDatacards.makeSubDir = cms.bool(True)")
    lines.append("process.prepareDatacards.categories = cms.VPSet(")
    for charge in ["OS", "SS"]:
	for ptEtaBin in ["BB_LL", "BB_ML", "BB_MM", "BB_HL", "BB_HM", "BB_HH", "EE_LL", "EE_ML", "EE_MM", "EE_HL", "EE_HM", "EE_HH", "BE_LL", "BE_ML", "EB_ML", "BE_MM", "BE_HL", "EB_HL", "BE_HM", "EB_HM", "BE_HH", "total"]:
	    lines.append("    cms.PSet(")
	    lines.append("        input = cms.string('%s/%s')," % (charge, ptEtaBin))
	    lines.append("        output = cms.string('ttH_%s_%s_%s')" % (self.channel, charge, ptEtaBin))
	    lines.append("    ),")
    lines.append(")")
    lines.append("process.prepareDatacards.histogramToFit = cms.string('%s')" % histogramToFit)
    lines.append("""process.prepareDatacards.sysShifts = cms.vstring(
            "CMS_ttHl_electronESBarrelUp",
        	"CMS_ttHl_electronESBarrelDown",
        	"CMS_ttHl_electronESEndcapUp",
	        "CMS_ttHl_electronESEndcapDown",
	        "CMS_ttHl_electronERUp",
	        "CMS_ttHl_electronERDown") """
    )
    create_cfg(self.cfgFile_prep_dcard_original, cfgFile_modified, lines)
    self.datacardFiles[key] = datacardFile
    self.cfgFile_prep_dcard_modified[key] = cfgFile_modified
예제 #5
0
    def createCfg_prep_dcard(self, histogramToFit, histogramDir=None, label=None):
        """Fills the template of python configuration file for datacard preparation

        Args:
          histogramToFit: name of the histogram used for signal extraction
        """
        datacardFile = os.path.join(
            self.outputDir, DKEY_DCRD, "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit))
        category_output = self.channel
        cfg_file_modified = os.path.join(
            self.outputDir, DKEY_CFGS, "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit))
        key = histogramToFit
        if not histogramDir:
            histogramDir = self.histogramDir_prep_dcard
            if label:
                datacardFile = datacardFile.replace(
                    channel, "%s_%s" % (channel, label))
                category_output += "_%s" % label
                cfg_file_modified = cfg_file_modified.replace(
                    "_cfg.py", "_%s_cfg.py" % label)
                key = getKey(histogramToFit, label)
        lines = []
        lines.append("process.fwliteInput.fileNames = cms.vstring('%s')" %
                     self.histogramFile_hadd_stage2)
        lines.append(
            "process.fwliteOutput.fileName = cms.string('%s')" % datacardFile)
        lines.append("process.prepareDatacards.processesToCopy = cms.vstring(%s)" %
                     self.prep_dcard_processesToCopy)
        lines.append("process.prepareDatacards.signals = cms.vstring(%s)" %
                     self.prep_dcard_signals)
        lines.append("process.prepareDatacards.makeSubDir = cms.bool(False)")
        lines.append("process.prepareDatacards.categories = cms.VPSet(")
        lines.append("    cms.PSet(")
        lines.append("        input = cms.string('%s/sel/evt')," %
                     self.histogramDir_prep_dcard)
        lines.append("        output = cms.string('ttH_%s')" % category_output)
        lines.append("    )")
        lines.append(")")
        lines.append(
            "process.prepareDatacards.histogramToFit = cms.string('%s')" % histogramToFit)
        create_cfg(self.cfgFile_prep_dcard_original, cfg_file_modified, lines)
        self.datacardFiles[key] = datacardFile
        self.cfgFile_prep_dcard_modified[key] = cfg_file_modified
  def __init__(self, outputDir, executable_analyze, samples, charge_selections,
               jet_minPt, jet_maxPt, jet_minAbsEta, jet_maxAbsEta, hadTau_selections, absEtaBins, ptBins, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs,
               executable_comp_jetToTauFakeRate):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "jetToTauFakeRate", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs,
      [])

    self.samples = samples

    self.charge_selections = charge_selections

    self.jet_minPt = jet_minPt
    self.jet_maxPt = jet_maxPt
    self.jet_minAbsEta = jet_minAbsEta
    self.jet_maxAbsEta = jet_maxAbsEta

    self.hadTau_selections = hadTau_selections
    
    self.absEtaBins = absEtaBins
    self.ptBins = ptBins

    self.executable_comp_jetToTauFakeRate = executable_comp_jetToTauFakeRate
    
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for charge_selection in self.charge_selections:
        key_dir = getKey(sample_name, charge_selection)  
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD ]:
          initDict(self.dirs, [ key_dir, dir_type ])
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
            "_".join([ charge_selection ]), process_name)
    ##print "self.dirs = ", self.dirs

    self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_jetToTauFakeRate_cfg.py")
    self.histogramFile_comp_jetToTauFakeRate = {}
    self.cfgFile_comp_jetToTauFakeRate_original = os.path.join(self.workingDir, "comp_jetToTauFakeRate_cfg.py")
    self.cfgFile_comp_jetToTauFakeRate_modified = {}    
    self.histogramDir_numerator = {}
    self.histogramDir_denominator = {}
예제 #7
0
 def addToMakefile_hadd_stage1(self, lines_makefile):
   inputFiles_hadd_stage1 = []
   for sample_name, sample_info in self.samples.items():
     if not sample_name in self.inputFileIds.keys():
       continue
     process_name = sample_info["process_name_specific"]
     inputFiles_sample = []
     for lepton_selection in self.lepton_selections:
       for central_or_shift in self.central_or_shifts:
         #Electron ER only for DY
         if (not "DY" in process_name) and "CMS_ttHl_electronER" in central_or_shift: continue
         inputFiles_jobIds = []                  
         for jobId in range(len(self.inputFileIds[sample_name])):
           key_file = getKey(sample_name, lepton_selection, central_or_shift, jobId)
           if key_file in self.histogramFiles.keys():
             inputFiles_jobIds.append(self.histogramFiles[key_file])
         if len(inputFiles_jobIds) > 0:
           haddFile_jobIds = self.histogramFile_hadd_stage1.replace(".root", "_%s_%s_%s.root" % \
             (process_name, lepton_selection, central_or_shift))
           lines_makefile.append("%s: %s" % (haddFile_jobIds, " ".join(inputFiles_jobIds)))
           lines_makefile.append("\t%s %s" % ("rm -f", haddFile_jobIds))
           lines_makefile.append("\t%s %s %s" % ("hadd", haddFile_jobIds, " ".join(inputFiles_jobIds)))
           lines_makefile.append("")
           inputFiles_sample.append(haddFile_jobIds)
           self.filesToClean.append(haddFile_jobIds)
     if len(inputFiles_sample) > 0:
       haddFile_sample = self.histogramFile_hadd_stage1.replace(".root", "_%s.root" % process_name)
       lines_makefile.append("%s: %s" % (haddFile_sample, " ".join(inputFiles_sample)))
       lines_makefile.append("\t%s %s" % ("rm -f", haddFile_sample))
       lines_makefile.append("\t%s %s %s" % ("hadd", haddFile_sample, " ".join(inputFiles_sample)))
       lines_makefile.append("")
       inputFiles_hadd_stage1.append(haddFile_sample)
       self.filesToClean.append(haddFile_sample)
   lines_makefile.append("%s: %s" % (self.histogramFile_hadd_stage1, " ".join(inputFiles_hadd_stage1)))
   lines_makefile.append("\t%s %s" % ("rm -f", self.histogramFile_hadd_stage1))
   lines_makefile.append("\t%s %s %s" % ("hadd", self.histogramFile_hadd_stage1, " ".join(inputFiles_hadd_stage1)))
   lines_makefile.append("")
   self.filesToClean.append(self.histogramFile_hadd_stage1)
예제 #8
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          central_or_shifts_extended = [ "" ]
          central_or_shifts_extended.extend(self.central_or_shifts)
          central_or_shifts_extended.extend([ "hadd", "addBackgrounds" ])
          for central_or_shift_or_dummy in central_or_shifts_extended:
            process_name_extended = [ process_name, "hadd" ]
            for process_name_or_dummy in process_name_extended:
              key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, central_or_shift_or_dummy)
              for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]:
                initDict(self.dirs, [ key_dir, dir_type ])
                if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                  self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                    "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy)
                else:
                  self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                    "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
            continue
          process_name = sample_info["process_name_specific"]
          logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

          sample_category = sample_info["sample_category"]
          is_mc = (sample_info["type"] == "mc")
          is_signal = (sample_category == "signal")

          for central_or_shift in self.central_or_shifts:

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
              if central_or_shift != "central":
                isFR_shape_shift = (central_or_shift in systematics.FR_all)
                if not ((lepton_selection == "Fakeable" and isFR_shape_shift) or lepton_selection == "Tight"):
                  continue
                if not is_mc and not isFR_shape_shift:
                  continue

              if central_or_shift in systematics.LHE().ttH and sample_category != "signal":
                continue
              if central_or_shift in systematics.LHE().ttW and sample_category != "TTW":
                continue
              if central_or_shift in systematics.LHE().ttZ and sample_category != "TTZ":
                continue
              if central_or_shift in systematics.DYMCReweighting and not is_dymc_reweighting(sample_name):
                continue

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, central_or_shift)
              analyze_job_tuple = (process_name, lepton_selection_and_frWeight, central_or_shift, jobId)
              key_analyze_job = getKey(*analyze_job_tuple)
              ntupleFiles = inputFileList[jobId]
              if len(ntupleFiles) == 0:
                logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                continue

              syncOutput = ''
              syncTree = ''
              syncRequireGenMatching = True
              if self.do_sync:
                mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                if lepton_selection_and_frWeight == 'Tight':
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                  syncTree = 'syncTree_%s_SR' % self.channel
                  syncRequireGenMatching = True
                elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights':
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                  syncTree = 'syncTree_%s_Fake' % self.channel
                elif mcClosure_match:
                  mcClosure_type = mcClosure_match.group('type')
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                  syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type)
                else:
                  continue
              if syncTree and central_or_shift != "central":
                syncTree = os.path.join(central_or_shift, syncTree)
              syncRLE = ''
              if self.do_sync and self.rle_select:
                syncRLE = self.rle_select % syncTree
                if not os.path.isfile(syncRLE):
                  logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                  continue
              if syncOutput:
                self.inputFiles_sync['sync'].append(syncOutput)

              cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
              logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
              rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                   if self.select_rle_output else ""
              histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)

              self.jobOptions_analyze[key_analyze_job] = {
                'ntupleFiles'              : ntupleFiles,
                'cfgFile_modified'         : cfgFile_modified_path,
                'histogramFile'            : histogramFile_path,
                'logFile'                  : logFile_path,
                'selEventsFileName_output' : rleOutputFile_path,
                'electronSelection'        : electron_selection,
                'muonSelection'            : muon_selection,
                'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                'hadTauSelection_veto'     : hadTauVeto_selection,
                'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                'central_or_shift'         : central_or_shift,
                'syncOutput'               : syncOutput,
                'syncTree'                 : syncTree,
                'syncRLE'                  : syncRLE,
                'syncRequireGenMatching'   : syncRequireGenMatching,
                'useNonNominal'            : self.use_nonnominal,
                'apply_hlt_filter'         : self.hlt_filter,
              }
              self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

              # initialize input and output file names for hadd_stage1
              key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight)
              hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight)
              key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
              if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
              self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
              self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                              "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple)

          if self.do_sync: continue

          if is_mc:
            logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name)

            sample_categories = [ sample_category ]
            if is_signal:
              sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz", "ttH_hmm", "ttH_hzg" ]
            for sample_category in sample_categories:
              # sum non-fake and fake contributions for each MC sample separately
              genMatch_categories = [ "nonfake", "conversions", "fake" ]
              for genMatch_category in genMatch_categories:
                key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight)
                key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, "addBackgrounds")
                addBackgrounds_job_tuple = None
                processes_input = None
                process_output = None
                if genMatch_category == "nonfake":
                  # sum non-fake contributions for each MC sample separately
                  # input processes: TT3l0g0j,...
                  # output processes: TT; ...
                  if sample_category in [ "signal" ]:
                    lepton_genMatches = []
                    lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                    lepton_genMatches.extend(self.lepton_genMatches_conversions)
                    lepton_genMatches.extend(self.lepton_genMatches_fakes)
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_genMatches ]
                  elif sample_category in [ "ttH" ]:
                    lepton_genMatches = []
                    lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                    lepton_genMatches.extend(self.lepton_genMatches_conversions)
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ]
                  process_output = sample_category
                  addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight)
                elif genMatch_category == "conversions":
                  # sum fake contributions for each MC sample separately
                  # input processes: TT2l1g0j, TT1l2g0j, TT0l3g0j; ...
                  # output processes: TT_conversion; ...
                  if sample_category in [ "signal" ]:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ]
                  elif sample_category in [ "ttH" ]:
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ]
                  process_output = "%s_conversion" % sample_category
                  addBackgrounds_job_tuple = (process_name, "%s_conversion" % sample_category, lepton_selection_and_frWeight)
                elif genMatch_category == "fake":
                  # sum fake contributions for each MC sample separately
                  # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l2g1j, TT0l1g2j, TT0l0g3j; ...
                  # output processes: TT_fake; ...
                  if sample_category in [ "signal" ]:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                  elif sample_category in [ "ttH" ]:
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                  process_output = "%s_fake" % sample_category
                  addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight)
                if processes_input:
                  logging.info(" ...for genMatch option = '%s'" % genMatch_category)
                  key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple)
                  cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple)
                  outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_tuple)
                  self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                    'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                    'cfgFile_modified' : cfgFile_modified,
                    'outputFile' : outputFile,
                    'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                    'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
                    'processes_input' : processes_input,
                    'process_output' : process_output
                  }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                  # initialize input and output file names for hadd_stage1_5
                  key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight)                  
                  key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
                  if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
                  self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                  self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                                      "hadd_stage1_5_%s.root" % lepton_selection_and_frWeight)

          # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
          if not is_mc:
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight)
            key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

        if self.do_sync: continue

        # sum fake background contributions for the total of all MC sample
        # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ...
        # output process: fakes_mc
        key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
        key_addBackgrounds_dir = getKey("addBackgrounds")
        addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight)
        key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
        sample_categories = []
        sample_categories.extend(self.nonfake_backgrounds)
        sample_categories.extend([ "signal" ])
        processes_input = []
        for sample_category in sample_categories:
          processes_input.append("%s_fake" % sample_category)
        self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
          'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_fakes_tuple),
          'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_fakes_tuple),
          'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
          'processes_input' : processes_input,
          'process_output' : "fakes_mc"
        }
        self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

        # sum conversion background contributions for the total of all MC sample
        # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ...
        # output process: conversions
        addBackgrounds_job_conversions_tuple = ("conversions", lepton_selection_and_frWeight)
        key_addBackgrounds_job_conversions = getKey(*addBackgrounds_job_conversions_tuple)
        sample_categories = []
        sample_categories.extend(self.nonfake_backgrounds)
        sample_categories.extend([ "signal" ])
        processes_input = []
        for sample_category in sample_categories:
          processes_input.append("%s_conversion" % sample_category)
        self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_conversions_tuple),
          'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_conversions_tuple),
          'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_conversions_tuple),
          'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
          'processes_input' : processes_input,
          'process_output' : "conversions"
        }
        self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions])

        # initialize input and output file names for hadd_stage2
        key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
        key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight)        
        key_hadd_stage2_job = getKey(lepton_selection_and_frWeight)
        if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
          self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
        if lepton_selection == "Tight":
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]['outputFile'])        
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
        self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                        "hadd_stage2_%s.root" % lepton_selection_and_frWeight)

    if self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_syncNtuple(lines_makefile)
      outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
      self.outputFile_sync['sync'] = outputFile_sync_path
      self.targets.append(outputFile_sync_path)
      self.addToMakefile_hadd_sync(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"))
    key_addFakes_job = getKey("fakes_data")
    category_sideband = "ttZctrl_Fakeable_wFakeRateWeights"
    self.jobOptions_addFakes[key_addFakes_job] = {
      'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
      'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_cfg.py"),
      'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes.root"),
      'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes.log"),
      'category_signal' : "ttZctrl_Tight",
      'category_sideband' : category_sideband
    }
    self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)      
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      key_prep_dcard_job = getKey(histogramToFit)
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
      key_add_syst_fakerate_dir = getKey("addSystFakeRates")                                    
      add_syst_fakerate_job_tuple = (self.channel, histogramToFit) 
      key_add_syst_fakerate_job = getKey(histogramToFit)      
      self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
        'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
        'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple),
        'category' : self.channel,
        'histogramToFit' : histogramToFit,
        'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png")
      }
      histogramDir_nominal = self.histogramDir_prep_dcard
      for lepton_type in [ 'e', 'm' ]:
        lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
        if lepton_mcClosure not in self.lepton_selections:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
        key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight)
        histogramDir_mcClosure = self.mcClosure_dir[lepton_mcClosure]
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
          'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
          'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
          'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
          'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
        })
      self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
    key_makePlots_dir = getKey("makePlots")                                       
    key_makePlots_job = getKey('')
    self.jobOptions_make_plots[key_makePlots_job] = {
      'executable' : self.executable_make_plots,
      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
      'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
      'histogramDir' : self.histogramDir_prep_dcard,
      'label' : "t#bar{t}Z control region",
      'make_plots_backgrounds' : self.make_plots_backgrounds
    }
    self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
예제 #9
0
    def create(self):
        """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_prodNtuple, process_name))

            inputFileList = generateInputFileList(sample_name, sample_info,
                                                  self.max_files_per_job,
                                                  self.debug)
            key_dir = getKey(sample_name)
            subDirs = list(
                map(
                    lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES],
                                           '%04d' % y),
                    set(map(lambda x: x // 1000, inputFileList.keys()))))
            for subDir in subDirs:
                create_if_not_exists(subDir)
            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                        key_file, self.inputFiles[key_file])
                    continue
                self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "produceNtuple_%s_%i_cfg.py" % \
                  (process_name, jobId))
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000),
                    "tree_%i.root" % jobId)
                self.logFiles_prodNtuple[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "produceNtuple_%s_%i.log" % \
                  (process_name, jobId))
                jobOptions = {
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_modified':
                    self.cfgFiles_prodNtuple_modified[key_file],
                    'outputFile': self.outputFiles[key_file],
                    ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                    'use_HIP_mitigation_mediumMuonId': True,
                    'is_mc': is_mc,
                    'random_seed': jobId
                }
                self.createCfg_prodNtuple(jobOptions)

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_prodNtuple)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_prodNtuple(lines_makefile)
        #self.addToMakefile_clean(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
예제 #10
0
    def __init__(
        self,
        configDir,
        outputDir,
        output_file,
        executable,
        samples,
        max_files_per_job,
        era,
        check_output_files,
        running_method,
        num_parallel_jobs,
        pool_id='',
        verbose=False,
        dry_run=False,
        use_home=True,
    ):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable = executable
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.use_home = use_home
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(self.configDir, "Makefile_puProfile")
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.output_file = os.path.join(self.outputDir, output_file)
        self.stdout_file_path = os.path.join(self.configDir,
                                             "stdout_puProfile.log")
        self.stderr_file_path = os.path.join(self.configDir,
                                             "stderr_puProfile.log")
        self.sw_ver_file_cfg = os.path.join(self.configDir,
                                            "VERSION_puProfile.log")
        self.sw_ver_file_out = os.path.join(self.outputDir,
                                            "VERSION_puProfile.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out))

        self.sbatchFile_puProfile = os.path.join(self.configDir,
                                                 "sbatch_puProfile.py")
        self.cfgFiles_puProfile = {}
        self.logFiles_puProfile = {}
        self.scriptFiles_puProfile = {}
        self.jobOptions_sbatch = {}

        self.inputFiles = {}
        self.outputFiles_tmp = {}
        self.outputFiles = {}

        self.phoniesToAdd = []
        self.filesToClean = []
        self.targets = []

        self.dirs = {}
        all_dirs = [
            DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS,
            DKEY_SCRIPTS, DKEY_HADD_RT
        ]
        cfg_dirs = [
            DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT
        ]

        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in all_dirs:
                if dir_type == DKEY_PLOTS:
                    continue
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in cfg_dirs:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in cfg_dirs:
            initDict(self.dirs, [dir_type])
            self.dirs[dir_type] = os.path.join(self.configDir, dir_type)

        self.cvmfs_error_log = {}
        self.num_jobs = {
            'hadd': 0,
            'puProfile': 0,
            'plot': 0,
        }
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for chargeSumSelection in self.chargeSumSelections:
        for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
          for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
            if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
              continue
            if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
              continue

            lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
            central_or_shift_extensions = ["", "hadd", "copyHistograms", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if process_name_or_dummy in [ "hadd" ] and central_or_shift_or_dummy != "":
                  continue
                evtcategories_extended = [""]
                evtcategories_extended.extend(self.evtCategories)
                if central_or_shift_or_dummy in [ "hadd", "copyHistograms", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info
                ):
                  continue

                key_dir = getKey(process_name_or_dummy, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]:
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                                                                "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                                                                "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                    
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for chargeSumSelection in self.chargeSumSelections:
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        lepton_selection = lepton_and_hadTau_selection
        ##hadTau_selection = None
        ##if lepton_and_hadTau_selection == "Tight":
        ##  hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp)
        ##else:
        ##  hadTau_selection = lepton_and_hadTau_selection
        hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp)
        electron_selection = lepton_selection
        muon_selection = lepton_selection
        
        if lepton_and_hadTau_selection == "Fakeable_mcClosure_e":
          electron_selection = "Fakeable"
          muon_selection = "Tight"
          hadTau_selection = "Tight|%s" % self.hadTau_mva_wp
        elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m":
          electron_selection = "Tight"
          muon_selection = "Fakeable"
          hadTau_selection = "Tight|%s" % self.hadTau_mva_wp
        elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t":
          electron_selection = "Tight"
          muon_selection = "Tight"
          hadTau_selection = "Fakeable"
          
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift)

              for jobId in inputFileList.keys():
                
                analyze_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                applyFakeRateWeights = self.applyFakeRateWeights  \
                  if self.isBDTtraining or not lepton_and_hadTau_selection == "Tight" \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTau_selection,
                  'apply_hadTauGenMatching'  : self.apply_hadTauGenMatching,
                  'chargeSumSelection'       : chargeSumSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'useObjectMultiplicity'    : True,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "hadd")
                hadd_stage1_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], 
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

                if self.isBDTtraining:
                  self.targets.append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

            if self.isBDTtraining:
              continue
            
            #----------------------------------------------------------------------------
            # split hadd_stage1 files into separate files, one for each event category
            for category in self.evtCategories:
              key_hadd_stage1_job = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_copyHistograms_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "copyHistograms")
              copyHistograms_job_tuple = (category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_copyHistograms_job = getKey(*copyHistograms_job_tuple)
              cfgFile_modified = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_CFGS], "copyHistograms_%s_%s_%s_%s_cfg.py" % copyHistograms_job_tuple)
              outputFile = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_HIST], "copyHistograms_%s_%s_%s_%s.root" % copyHistograms_job_tuple)
              self.jobOptions_copyHistograms[key_copyHistograms_job] = {
                'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                'cfgFile_modified' : cfgFile_modified,
                'outputFile' : outputFile,
                'logFile' : os.path.join(self.dirs[key_copyHistograms_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                'categories' : [ category ],
              }
              self.createCfg_copyHistograms(self.jobOptions_copyHistograms[key_copyHistograms_job])
            #----------------------------------------------------------------------------

            # add output files of copyHistograms jobs to list of input files for hadd_stage1_5
            for category in self.evtCategories:
              key_copyHistograms_job = getKey(category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage1_5_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              hadd_stage1_5_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
              if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_copyHistograms[key_copyHistograms_job]['outputFile'])
              self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                          "hadd_stage1_5_%s_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          for category in self.evtCategories:
            # sum fake background contributions for the total of all MC sample
            # input processes: TT_fake, TTW_fake, TTWW_fake, ...
            # output process: fakes_mc
            key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
            key_addBackgrounds_dir = getKey("addBackgrounds")
            addBackgrounds_job_fakes_tuple = ("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
            key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
            sample_categories = []
            sample_categories.extend(self.nonfake_backgrounds)
            processes_input = []
            for sample_category in sample_categories:
              processes_input.append("%s_fake" % sample_category)
            self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
              'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
              'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
              'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
              'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
              'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
              'processes_input' : processes_input,
              'process_output' : "fakes_mc"
              }
            self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])
            
            # sum conversion background contributions for the total of all MC sample
            # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
            # output process: Convs
            addBackgrounds_job_Convs_tuple = ("Convs", category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection)
            key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
            sample_categories = []
            sample_categories.extend(self.nonfake_backgrounds)
            processes_input = []
            for sample_category in sample_categories:
              processes_input.append("%s_Convs" % sample_category)
            self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
              'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
              'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
              'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
              'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
              'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
              'processes_input' : processes_input,
              'process_output' : "Convs"
              }
            self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

            # sum signal contributions from gluon fusion and VBF HH production,
            # separately for "nonfake" and "fake" contributions
            genMatch_categories = [ "nonfake", "fake" ]
            for genMatch_category in genMatch_categories:
              for signal_base, signal_input in self.signal_io.items():
                addBackgrounds_job_signal_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection, signal_base, genMatch_category)
                key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
                if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                  continue
                processes_input = signal_input
                process_output = signal_base
                if genMatch_category == "fake":
                  processes_input = [ process_input + "_fake" for process_input in processes_input ]
                  process_output += "_fake"
                  self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                    'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                    'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                    'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                    'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                    'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
                    'processes_input' : processes_input,
                    'process_output' : process_output
                    }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
                  key_hadd_stage2_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
                  if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
                  if lepton_selection == "Tight":
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

              # initialize input and output file names for hadd_stage2
              key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage2_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              hadd_stage2_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])
              self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
              self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                              "hadd_stage2_%s_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for chargeSumSelection in self.chargeSumSelections:
      for category in self.evtCategories:
        key_hadd_stage1_5_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"))
        key_addFakes_dir = getKey("addBackgroundLeptonFakes")
        addFakes_job_tuple = (category, chargeSumSelection)
        key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple)
        self.jobOptions_addFakes[key_addFakes_job] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple),
          'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple),
          'category_signal' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection),
          'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", chargeSumSelection)
        }
        self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
        key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for chargeSumSelection in self.chargeSumSelections:
      for category in self.evtCategories:
        for histogramToFit in self.histograms_to_fit:
          key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
          key_prep_dcard_dir = getKey("prepareDatacards")
          prep_dcard_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit)
          key_prep_dcard_job = getKey(category, chargeSumSelection, histogramToFit)
          self.jobOptions_prep_dcard[key_prep_dcard_job] = {
            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
            'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple),
            'histogramDir' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection),
            'histogramToFit' : histogramToFit
          }
          self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])
          # add shape templates for the following systematic uncertainties:
          #  - 'CMS_ttHl_Clos_norm_e'
          #  - 'CMS_ttHl_Clos_shape_e'
          #  - 'CMS_ttHl_Clos_norm_m'
          #  - 'CMS_ttHl_Clos_shape_m'
          key_add_syst_fakerate_dir = getKey("addSystFakeRates")
          add_syst_fakerate_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit)
          key_add_syst_fakerate_job = getKey(category, chargeSumSelection, histogramToFit)
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
            'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
            'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
            'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
            'category' : category,
            'histogramToFit' : histogramToFit,
            'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
          }
          histogramDir_nominal = getHistogramDir(category, "Tight", "disabled", chargeSumSelection)
          for lepton_and_hadTau_type in [ 'e', 'm', 't' ]:
            lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type
            if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections:
              continue
            lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled")
            key_addBackgrounds_job_fakes = getKey("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
            histogramDir_mcClosure = self.mcClosure_dir[lepton_and_hadTau_mcClosure+'_%s' %chargeSumSelection]
            histogramDir_mcClosure = histogramDir_mcClosure.replace(self.evtCategory_inclusive, category)
            self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
              'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections,
              'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
              'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
              'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
              'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
            })
          self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    for chargeSumSelection in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(self.evtCategory_inclusive, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
      key_makePlots_dir = getKey("makePlots")
      key_makePlots_job = getKey(chargeSumSelection)
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_cfg.py" % (self.channel, chargeSumSelection)),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s.png" % (self.channel, chargeSumSelection)),
        'histogramDir' : getHistogramDir(self.evtCategory_inclusive, "Tight", "disabled", chargeSumSelection),
        'label' : '1l1tau',
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
      if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO
        key_makePlots_job = getKey(chargeSumSelection)
        key_hadd_stage2 = getKey(chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
        self.jobOptions_make_plots[key_makePlots_job] = {
          'executable' : self.executable_make_plots_mcClosure,
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
          'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_%s_cfg.py" % (self.channel, chargeSumSelection)),
          'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s_%s.png" % (self.channel, chargeSumSelection))
        }
        self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])
        
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_copyHistograms)
      self.sbatchFile_copyHistograms = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_copyHistograms_%s.py" % self.channel)
      self.createScript_sbatch_copyHistograms(self.executable_copyHistograms, self.sbatchFile_copyHistograms, self.jobOptions_copyHistograms)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch_addFakes(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_copyHistograms(lines_makefile, make_target = "phony_copyHistograms", make_dependency = "phony_hadd_stage1")
    self.addToMakefile_backgrounds_from_data(lines_makefile, make_dependency = "phony_copyHistograms")
    #----------------------------------------------------------------------------                             
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done")

    return self.num_jobs
예제 #12
0
    def create(self):
        """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if not is_mc:
                continue

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList_map = generateInputFileList(sample_info, 1)
            key_dir = getKey(process_name)
            key_file = getKey(process_name)

            self.inputFiles[key_file] = list(
                itertools.chain(*inputFileList_map.values()))
            if len(self.inputFiles[key_file]) == 0:
                logging.warning("'%s' = %s --> skipping job !!" %
                                (key_file, self.inputFiles[key_file]))
                continue

            outputFile = os.path.join(self.dirs[key_dir][DKEY_RESULTS],
                                      "%s.txt" % process_name)
            self.outputFiles[key_file] = outputFile
            if os.path.isfile(outputFile):
                logging.info('File {} already exists --> skipping job'.format(
                    outputFile))
                continue

            self.cfgFiles[key_file] = os.path.join(
                self.dirs[key_dir][DKEY_CFGS],
                "refGenWeight_%s_cfg.txt" % (process_name))
            self.logFiles[key_file] = os.path.join(
                self.dirs[key_dir][DKEY_LOGS],
                "refGenWeight_%s.log" % (process_name))
            self.scriptFiles[key_file] = os.path.join(
                self.dirs[key_dir][DKEY_CFGS],
                "refGenWeight_%s_cfg.sh" % (process_name))
            self.plotFiles[key_file] = ' '.join([
                os.path.join(self.dirs[key_dir][DKEY_PLOTS],
                             "refGenWeight_%s.%s" % (process_name, extension))
                for extension in ['pdf', 'png']
            ])

            self.jobOptions_sbatch[key_file] = {
                'inputFiles':
                self.inputFiles[key_file],
                'cfgFile_path':
                self.cfgFiles[key_file],
                'cmdParams':
                "-i {} -o {} -p {} -v".format(
                    self.cfgFiles[key_file],
                    self.outputFiles[key_file],
                    self.plotFiles[key_file],
                ),
                'outputFile':
                self.outputFiles[key_file],
                'logFile':
                self.logFiles[key_file],
                'scriptFile':
                self.scriptFiles[key_file],
            }
            self.createCfg(self.jobOptions_sbatch[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            self.num_jobs['refGenWeight'] += self.createScript_sbatch(
                self.executable, self.sbatchFile, self.jobOptions_sbatch)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile(lines_makefile)
        self.addToMakefile_final(lines_makefile)
        self.createMakefile(lines_makefile)
        logging.info("Done")

        return self.num_jobs
예제 #13
0
    def create(self):
        """Creates all necessary config files and runs the MEM -- either locally or on the batch system
        """
        statistics = {}

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue

            if not os.path.exists(sample_info['local_paths'][0]['path']):
                logging.warning("Skipping sample {sample_name}".format(sample_name = sample_name))
                continue

            process_name = sample_info["process_name_specific"]

            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_addMEM, process_name))

            inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
            # typically, the analysis ends here and starts looping b/c the smallest unit of work processes at least one file
            # we need, however, to split the file into event ranges in such a way that each job performs
            # mem_integrations_per_job MEM integrations

            # so what we are going to do is to open each set of files in inputFileList, read the variable
            # requestMEM_2lss_1tau and try to gather the event ranges such that each event range
            # performs up to mem_integrations_per_job integrations per job
            memEvtRangeDict = self.memJobList(inputFileList)

            for jobId in memEvtRangeDict.keys():

                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = memEvtRangeDict[jobId]['input_fileset']

                # there should always be a job
                assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \
                                                       ', '.join(self.inputFiles[key_file])

                #TODO: is this assertion really needed? in principle, no ...
                assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_addMEM_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" % \
                                                                       (self.channel, process_name, jobId))
                self.outputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_NTUPLES], "%s_%i.root" % \
                  (process_name, jobId))
                self.logFiles_addMEM[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" % \
                                                              (self.channel, process_name, jobId))
                self.createCfg_addMEM(
                    self.inputFiles[key_file],
                    memEvtRangeDict[jobId]['event_range'][0],
                    memEvtRangeDict[jobId]['event_range'][1],
                    self.outputFiles[key_file],
                    self.era,
                    self.cfgFiles_addMEM_modified[key_file],
                )

                # associate the output file with the fileset_id
                fileset_id = memEvtRangeDict[jobId]['fileset_id']
                hadd_output = os.path.join(
                    self.dirs[key_dir][DKEY_FINAL_NTUPLES], '%s_%i.root' % ('tree', fileset_id)
                )
                if hadd_output not in self.hadd_records:
                    self.hadd_records[hadd_output] = {}
                    self.hadd_records[hadd_output]['output_files'] = []
                self.hadd_records[hadd_output]['fileset_id'] = fileset_id
                self.hadd_records[hadd_output]['output_files'].append(self.outputFiles[key_file])
                #self.filesToClean.append(self.outputFiles[key_file])

            # let's sum the number of integration per sample
            nofEntriesMap = {}
            for v in memEvtRangeDict.values():
                if v['fileset_id'] not in nofEntriesMap:
                    nofEntriesMap[v['fileset_id']] = v['nof_entries']
            statistics[process_name] = {
                'nof_int'         : sum([entry['nof_int'] for entry in memEvtRangeDict.values()]),
                'nof_entries'     : sum(nofEntriesMap.values()),
                'nof_jobs'        : len(memEvtRangeDict),
                'nof_events_pass' : sum([entry['nof_events_pass'] for entry in memEvtRangeDict.values()]),
                'nof_int_pass'    : sum([entry['nof_int_pass'] for entry in memEvtRangeDict.values()]),
            }

        if self.is_sbatch:
            logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addMEM)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_addMEM(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.createMakefile(lines_makefile)

        ws_len = max([len(kk) + 1 for kk in statistics.keys()])
        total_nof_integrations_sum = sum(x['nof_int'] for x in statistics.values())
        total_nof_entires          = sum(x['nof_entries'] for x in statistics.values())
        total_nof_integrations_avg = float(total_nof_integrations_sum) / total_nof_entires
        total_nof_jobs             = sum(x['nof_jobs'] for x in statistics.values())
        total_nof_pass             = sum(x['nof_events_pass'] for x in statistics.values())
        total_nof_int_pass_avg     = float(sum(x['nof_int_pass'] for x in statistics.values())) / total_nof_pass
        for k, v in statistics.iteritems():
            print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass)' %
                  (k, ' ' * (ws_len - len(k)), v['nof_int'], v['nof_entries'], v['nof_jobs'],
                   float(v['nof_int']) / v['nof_entries'], v['nof_events_pass'],
                   (100 * float(v['nof_events_pass']) / v['nof_entries']), float(v['nof_int_pass']) / v['nof_events_pass']))
        print('%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass)' %
              ('total', ' ' * (ws_len - len('total')), total_nof_integrations_sum, total_nof_entires, total_nof_jobs,
               total_nof_integrations_avg, total_nof_pass, total_nof_int_pass_avg))

        if total_nof_integrations_sum > self.max_mem_integrations:
            logging.error("Will not start the jobs (max nof integrations exceeded)!")
            return False
        else:
            logging.info("Done")
            return True
예제 #14
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
                ##print "processing sample %s: jobId = %i" % (process_name, jobId)

                # build config files for executing analysis code
                key_analyze_dir = getKey(process_name)
                analyze_job_tuple = (process_name, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                    logging.warning(
                        "No input ntuples for %s --> skipping job !!" %
                        (key_analyze_job))
                    continue

                cfgFile_modified_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_CFGS],
                    "analyze_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_LOGS],
                    "analyze_%s_%i.log" % analyze_job_tuple)
                histogramFile_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_HIST],
                    "analyze_%s_%i.root" % analyze_job_tuple)

                self.jobOptions_analyze[key_analyze_job] = {
                    'ntupleFiles': ntupleFiles,
                    'cfgFile_modified': cfgFile_modified_path,
                    'histogramFile': histogramFile_path,
                    'histogramDir': 'analyze_hadTopTagger',
                    'logFile': logFile_path,
                    'hadTauSelection': self.hadTau_selection,
                    'lumiScale': 1.,
                    'selectBDT': True,
                }
                self.createCfg_analyze(
                    self.jobOptions_analyze[key_analyze_job], sample_info)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name,
                                             lepton_selection_and_frWeight)
                key_hadd_stage1_job = getKey(process_name)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                    self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(
                    self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[
                    key_hadd_stage1_job] = os.path.join(
                        self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                        "hadd_stage1_%s.root" % process_name)
                self.targets.append(
                    self.outputFile_hadd_stage1[key_hadd_stage1_job])

        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done.")

        return self.num_jobs
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for leptonChargeSelection in self.leptonChargeSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info
                ):
                  continue
                
                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)                
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.") 

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for leptonChargeSelection in self.leptonChargeSelections:

          if 'mcClosure' in lepton_selection and leptonChargeSelection != 'SS':
            continue

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift)
              
              for jobId in inputFileList.keys():
                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if lepton_selection.find("Tight") == -1 \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTauVeto_selection,
                  'leptonChargeSelection'    : leptonChargeSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'gen_mHH'                  : self.gen_mHH,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining or self.do_sync:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining or self.do_sync:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = self.get_sample_categories()
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_flip, TTW_flip, TTWW_flip, ...
          # output process: flips_mc
          addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_flip" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple),
            'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
            'processes_input' : processes_input,
            'process_output' : "flips_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"),
          # separately for "nonfake" and "fake" contributions
          genMatch_categories = [ "nonfake", "fake" ]
          for genMatch_category in genMatch_categories:
            for signal_base, signal_input in self.signal_io.items():
              addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category)
              key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
              if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                continue
              processes_input = signal_input
              process_output = signal_base
              if genMatch_category == "fake":
                processes_input = [ process_input + "_fake" for process_input in processes_input ]
                process_output += "_fake"
              self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
                'processes_input' : processes_input,
                'process_output' : process_output
              }
              self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
              key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining or self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.isBDTtraining:
          self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
        elif self.do_sync:
          self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      if self.isBDTtraining:
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
      elif self.do_sync:
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.targets.append(outputFile_sync_path)
        self.addToMakefile_hadd_sync(lines_makefile)
      else:
        raise ValueError("Internal logic error")
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for category in self.categories:
      for leptonChargeSelection in self.leptonChargeSelections:
        key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection)
        key_addFakes_dir = getKey("addBackgroundLeptonFakes")
        addFakes_job_tuple = (category, leptonChargeSelection)
        key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple)        
        self.jobOptions_addFakes[key_addFakes_job] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple),
          'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple),
          'category_signal' : getHistogramDir(category, "Tight", "disabled", leptonChargeSelection),
          'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", leptonChargeSelection)
          }
        self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    #--------------------------------------------------------------------------
    # add histograms in OS and SS regions,
    # so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6:
      self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = []
    for category in self.categories:
      key_addFakes_job = getKey("data_fakes", category, leptonChargeSelection)
      self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])
    self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
    self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST],
                                                                        "hadd_stage1_6_Tight_OS.root")
    #--------------------------------------------------------------------------

    logging.info("Creating configuration files to run 'addBackgroundFlips'")
    for category in self.categories:
      key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_addFlips_dir = getKey("addBackgroundLeptonFlips")
      key_addFlips_job = getKey("data_flips", category)
      self.jobOptions_addFlips[key_addFlips_job] = {
        'inputFile' : self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_%s_cfg.py" % category),
        'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips_%s.root" % category),
        'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips_%s.log" % category),
        'category_signal' : getHistogramDir(category, "Tight", "disabled", "SS" ),
        'category_sideband' : getHistogramDir(category, "Tight", "disabled", "OS" )
        }
      self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'...")
    for category in self.categories:
      for histogramToFit in self.histograms_to_fit:
        logging.info(" ...  for category %s, histogram %s" % (category, histogramToFit))
        prep_dcard_HH = set()
        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"]:
            continue
          sample_category = sample_info["sample_category"]
          masses_to_exclude = ["3000", "2500", "2000", "1750", "1500", "1250"]
          if sample_category.startswith("signal"):
            sample_category = sample_info["sample_category_hh"]
            doAdd = False
            if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
              if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category:
                doAdd = True
              if "spin0" in histogramToFit and "spin0" in sample_category and histogramToFit[9:13] in sample_category:
                doAdd = True
              if "spin2" in histogramToFit and "spin2" in sample_category and histogramToFit[9:13] in sample_category:
                doAdd = True
              for mass in masses_to_exclude:
                if mass in sample_category: doAdd = False
            else:
              doAdd = True
            if doAdd:
              if "wwww" in sample_category:
                prep_dcard_HH.add(sample_category.replace("wwww", "zzzz"))
                prep_dcard_HH.add(sample_category.replace("wwww", "wwww"))
                prep_dcard_HH.add(sample_category.replace("wwww", "zzww"))
              elif "wwtt" in sample_category:
                prep_dcard_HH.add(sample_category.replace("wwtt", "ttzz"))
                prep_dcard_HH.add(sample_category.replace("wwtt", "ttww"))
              elif "tttt" in sample_category:                  
                prep_dcard_HH.add(sample_category)
              else:
                raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category)
        prep_dcard_HH = list(prep_dcard_HH)
        prep_dcard_H = []
        prep_dcard_other_nonfake_backgrounds = []
        for process in self.nonfake_backgrounds:
          if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]:
            prep_dcard_H.append("%s_hww" % process)
            prep_dcard_H.append("%s_hzz" % process)
            prep_dcard_H.append("%s_htt" % process)
            prep_dcard_H.append("%s_hbb" % process)
          else:
            prep_dcard_other_nonfake_backgrounds.append(process)
        self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "data_flips", "fakes_mc", "flips_mc" ]
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        key_prep_dcard_dir = getKey("prepareDatacards")
        prep_dcard_job_tuple = (self.channel, category, "SS", histogramToFit)
        key_prep_dcard_job = getKey(category, "SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : getHistogramDir(category, "Tight", "disabled", "SS"),
          'histogramToFit' : histogramToFit,
          'label' : "2lSS"
          }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

        if "OS" in self.leptonChargeSelections:
          key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
          prep_dcard_job_tuple = (self.channel, category, "OS", histogramToFit)
          key_prep_dcard_job = getKey(category, "OS", histogramToFit)
          self.jobOptions_prep_dcard[key_prep_dcard_job] = {
            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
            'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple),
            'histogramDir' : getHistogramDir(category, "Tight", "disabled", "OS"),
            'histogramToFit' : histogramToFit,
            'label' : "2lOS",
            }
          self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

        # add shape templates for the following systematic uncertainties:
        #  - 'CMS_ttHl_Clos_norm_e'
        #  - 'CMS_ttHl_Clos_shape_e'
        #  - 'CMS_ttHl_Clos_norm_m'
        #  - 'CMS_ttHl_Clos_shape_m'
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, category, "SS", histogramToFit)
        key_add_syst_fakerate_job = getKey(category, "SS", histogramToFit)
        key_prep_dcard_job = getKey(category, "SS", histogramToFit)
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : category,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = "%s/sel/evt/fakes_mc" % getHistogramDir(category, "Tight", "disabled", "SS")
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, "SS")
          histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_mcClosure, "SS")]
          if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
            histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard")
            histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard")
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])
            
    logging.info("Creating configuration files to run 'makePlots'")
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
    key_makePlots_dir = getKey("makePlots")        
    key_makePlots_job = getKey("SS")
    self.jobOptions_make_plots[key_makePlots_job] = {
      'executable' : self.executable_make_plots,
      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
      'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
      'histogramDir' : self.histogramDir_prep_dcard,
      'label' : "2lSS",
      'make_plots_backgrounds' : self.make_plots_backgrounds,
    }
    self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "OS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_OS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_OS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_OS,
        'label' : "2lOS",
        'make_plots_backgrounds' : self.make_plots_backgrounds_OS,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel),
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips)
      self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
예제 #16
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      for dir_type in self.dirs[key].keys():
        create_if_not_exists(self.dirs[key][dir_type])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

      for lepton_selection in self.lepton_selections:
        for central_or_shift in self.central_or_shifts:

          inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
          for jobId in inputFileList.keys():
            if central_or_shift != "central" and not (lepton_selection == "Tight"):
              continue
            if central_or_shift != "central" and not is_mc:
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
              continue

            key_dir = getKey(sample_name, lepton_selection)
            key_file = getKey(sample_name, lepton_selection, central_or_shift, jobId)

            self.ntupleFiles[key_file] = inputFileList[jobId]
            self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
              (self.channel, process_name, lepton_selection, central_or_shift, jobId))
            self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
              (process_name, lepton_selection, central_or_shift, jobId))
            self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
              (self.channel, process_name, lepton_selection, central_or_shift, jobId))
            self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \
              (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else ""  
            self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
              lepton_selection, 
              is_mc, central_or_shift, lumi_scale, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file], self.rleOutputFiles[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()
      print self.sbatchFile_analyze
    #logging.info("Creating configuration files for executing 'addBackgroundLeptonFakes'")
    #self.createCfg_addFakes(self.histogramFile_hadd_stage1, self.histogramFile_addFakes, self.cfgFile_addFakes_modified)

    #logging.info("Creating configuration files for executing 'addBackgroundLeptonFlips'")
    #self.createCfg_addFlips(self.histogramFile_hadd_stage1, self.histogramFile_addFlips, self.cfgFile_addFlips_modified)

    logging.info("Creating configuration files for executing 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      self.createCfg_prep_dcard(histogramToFit)

    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
    def __init__(
        self,
        configDir,
        localDir,
        outputDir,
        output_file,
        executable,
        samples,
        max_files_per_job,
        era,
        binning,
        use_gen_weight,
        check_output_files,
        running_method,
        num_parallel_jobs,
        pool_id='',
        verbose=False,
        dry_run=False,
        use_home=False,
        keep_logs=False,
        submission_cmd=None,
    ):

        self.configDir = configDir
        self.localDir = localDir
        self.outputDir = outputDir
        self.executable = executable
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.binning = binning
        self.use_gen_weight = use_gen_weight
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.use_home = use_home
        self.keep_logs = keep_logs
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(self.localDir, "Makefile_nonResDenom")
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.localDir)
        create_if_not_exists(self.outputDir)
        self.output_file = os.path.join(self.outputDir, output_file)
        self.stdout_file_path = os.path.join(self.localDir,
                                             "stdout_nonResDenom.log")
        self.stderr_file_path = os.path.join(self.localDir,
                                             "stderr_nonResDenom.log")
        self.sw_ver_file_cfg = os.path.join(self.localDir,
                                            "VERSION_nonResDenom.log")
        self.sw_ver_file_out = os.path.join(self.outputDir,
                                            "VERSION_nonResDenom.log")
        self.submission_out = os.path.join(self.localDir,
                                           "SUBMISSION_nonResDenom.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out))
        check_submission_cmd(self.submission_out, submission_cmd)

        self.sbatchFile_nonResDenom = os.path.join(self.localDir,
                                                   "sbatch_nonResDenom.py")
        self.cfgFiles_nonResDenom = {}
        self.logFiles_nonResDenom = {}
        self.scriptFiles_nonResDenom = {}
        self.jobOptions_sbatch = {}

        self.inputFiles = {}
        self.outputFiles_tmp = {}
        self.outputFiles = {}

        self.phoniesToAdd = []
        self.filesToClean = [self.configDir]
        self.targets = []

        self.dirs = {}
        all_dirs = [
            DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS,
            DKEY_SCRIPTS, DKEY_HADD_RT
        ]
        cfg_dirs = [
            DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT
        ]

        self.gen_weights = {}
        if self.use_gen_weight:
            ref_genweights = os.path.join(os.environ['CMSSW_BASE'], 'src',
                                          'tthAnalysis', 'HiggsToTauTau',
                                          'data',
                                          'refGenWeight_{}.txt'.format(era))
            with open(ref_genweights, 'r') as f:
                for line in f:
                    line_split = line.strip().split()
                    assert (len(line_split) == 2)
                    sample_name = line_split[0]
                    ref_genweight = float(line_split[1])
                    assert (sample_name not in self.gen_weights)
                    self.gen_weights[sample_name] = ref_genweight

        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue
            process_name = sample_info["process_name_specific"]
            if self.use_gen_weight:
                assert (re.sub('_duplicate$', '', process_name)
                        in self.gen_weights)
            key_dir = getKey(process_name)
            for dir_type in all_dirs:
                if dir_type == DKEY_PLOTS:
                    continue
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in cfg_dirs:
                    dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir
                    self.dirs[key_dir][dir_type] = os.path.join(
                        dir_choice, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in cfg_dirs:
            initDict(self.dirs, [dir_type])
            dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir
            self.dirs[dir_type] = os.path.join(dir_choice, dir_type)
            if dir_choice != self.configDir:
                self.filesToClean.append(self.dirs[dir_type])

        self.cvmfs_error_log = {}
        self.num_jobs = {
            'hadd': 0,
            'nonResDenom': 0,
            'plot': 0,
        }
예제 #18
0
    def __init__(self,
            treeName,
            outputDir,
            cfgDir,
            executable_addMEM,
            samples,
            era,
            check_output_files,
            running_method,
            max_files_per_job,
            mem_integrations_per_job,
            max_mem_integrations,
            num_parallel_jobs,
            leptonSelection,
            hadTauSelection,
            integration_choice,
            jet_cleaning_by_index,
            dry_run,
            use_nonnominal,
            use_home,
            channel,
            rle_filter_file = '',
            submission_cmd = None,
            pool_id = '',
            max_jobs_per_sample = -1,
          ):

        self.treeName = treeName
        self.outputDir = outputDir
        self.cfgDir = cfgDir
        self.executable_addMEM = executable_addMEM
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.max_jobs_per_sample = max_jobs_per_sample
        self.samples = samples
        self.era = era
        self.check_output_files = check_output_files
        self.channel = channel
        self.rle_filter_file = rle_filter_file
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        if self.hadTauSelection:
            self.hadTauDefinition = self.hadTauSelection.split('|')[0]
            self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1]
        else:
            self.hadTauDefinition = None
            self.hadTauWorkingPoint = None
        self.maxPermutations_branchName = None
        self.integration_choice = integration_choice
        self.jet_cleaning_by_index = jet_cleaning_by_index
        logging.info(
            "Number of integration points: %s" % self.integration_choice
        )
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(
          self.cfgDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.dry_run = dry_run
        self.use_nonnominal = use_nonnominal
        self.use_home = use_home
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: {workingDir}".format(workingDir = self.workingDir))

        for dirPath in [self.outputDir, self.cfgDir]:
          create_if_not_exists(dirPath)

        self.stdout_file_path = os.path.join(self.cfgDir, "stdout_%s.log" % self.channel)
        self.stderr_file_path = os.path.join(self.cfgDir, "stderr_%s.log" % self.channel)
        self.sw_ver_file_cfg  = os.path.join(self.cfgDir, "VERSION_%s.log" % self.channel)
        self.sw_ver_file_out  = os.path.join(self.outputDir, "VERSION_%s.log" % self.channel)
        self.submission_out   = os.path.join(self.cfgDir, "SUBMISSION_%s.log" % self.channel)
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version((
            self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out
        ))
        check_submission_cmd(self.submission_out, submission_cmd)

        self.dirs = {}
        self.samples = samples
        self.cfgFiles_addMEM_modified = {}
        self.shFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(self.cfgDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        del self.samples['sum_events']
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name)
            for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(self.cfgDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
예제 #19
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for chargeSumSelection in self.chargeSumSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, chargeSumSelection, sample_info
                ):
                  continue

                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for chargeSumSelection in self.chargeSumSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, chargeSumSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, chargeSumSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                syncOutput = ''
                syncTree = ''
                if self.do_sync:
                  if chargeSumSelection != 'OS':
                    continue
                  mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                  if lepton_selection_and_frWeight == 'Tight':
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                    syncTree   = 'syncTree_%s_SR' % self.channel.replace('_', '')
                  elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights':
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                    syncTree   = 'syncTree_%s_Fake' % self.channel.replace('_', '')
                  elif mcClosure_match:
                    mcClosure_type = mcClosure_match.group('type')
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                    syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel.replace('_', ''), mcClosure_type)
                  else:
                    continue
                if syncTree and central_or_shift != "central":
                  syncTree = os.path.join(central_or_shift, syncTree)
                syncRLE = ''
                if self.do_sync and self.rle_select:
                  syncRLE = self.rle_select % syncTree
                  if not os.path.isfile(syncRLE):
                    logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                    continue
                if syncOutput:
                  self.inputFiles_sync['sync'].append(syncOutput)

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                branchName_memOutput = '%s_%s' % (self.MEMbranch, self.get_addMEM_systematics(central_or_shift)) \
                                       if self.MEMbranch else ''

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTauVeto_selection,
                  'chargeSumSelection'       : chargeSumSelection,
                  'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'selectBDT'                : self.isBDTtraining,
                  'branchName_memOutput'     : branchName_memOutput,
                  'syncOutput'               : syncOutput,
                  'syncTree'                 : syncTree,
                  'syncRLE'                  : syncRLE,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'isControlRegion'          : self.isControlRegion,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining or self.do_sync:
              continue

            # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining or self.do_sync:
            continue

          ## doing list of processes to make the hadd in _Convs and _fake
          ## we could remove the tH ones with althernative couplings
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input_base = self.get_processes_input_base(sample_categories)

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          processes_input = []
          for process_input_base in processes_input_base:
            if "HH" in process_input_base:
              continue
            processes_input.append("%s_fake" % process_input_base)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          processes_input = []
          for process_input_base in self.convs_backgrounds:
            if "HH" in process_input_base:
              continue
            processes_input.append("%s_Convs" % process_input_base)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining or self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.isBDTtraining:
          self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
        elif self.do_sync:
          self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      if self.isBDTtraining:
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
      elif self.do_sync:
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.addToMakefile_hadd_sync(lines_makefile)
      else:
        raise ValueError("Internal logic error")
      self.addToMakefile_validate(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for chargeSumSelection in self.chargeSumSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", chargeSumSelection)
      category_sideband = "{}_{}_Fakeable_wFakeRateWeights".format(self.channel, chargeSumSelection)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection),
        'category_signal' : "{}_{}_Tight".format(self.channel, chargeSumSelection),
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : None
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      if "SS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : 'SS'
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      for chargeSumSelection in self.chargeSumSelections:
        key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit)
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if chargeSumSelection == "OS":
          histogramDir_nominal = self.histogramDir_prep_dcard
        elif chargeSumSelection == "SS":
          histogramDir_nominal = self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection)
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection)
          histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, chargeSumSelection)]
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : self.channel,
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "{} SS".format(self.channel),
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
    self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      for dir_type in self.dirs[key].keys():
        create_if_not_exists(self.dirs[key][dir_type])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      apply_genWeight = sample_info["apply_genWeight"] if (is_mc and "apply_genWeight" in sample_info.keys()) else False
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

      for charge_selection in self.charge_selections:
        for central_or_shift in self.central_or_shifts:

          inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
          for jobId in inputFileList.keys():
            if central_or_shift != "central" and not is_mc:
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
              continue
            if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
              continue

            key_dir = getKey(sample_name, charge_selection)
            key_file = getKey(sample_name, charge_selection, central_or_shift, jobId)

            self.ntupleFiles[key_file] = inputFileList[jobId]
            self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
              (self.channel, process_name, charge_selection, central_or_shift, jobId))
            self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
              (process_name, charge_selection, central_or_shift, jobId))
            self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
              (self.channel, process_name, charge_selection, central_or_shift, jobId))
                
            self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
              charge_selection, self.jet_minPt, self.jet_maxPt, self.jet_minAbsEta, self.jet_maxAbsEta, self.hadTau_selections, self.absEtaBins,
              is_mc, central_or_shift, lumi_scale, apply_genWeight, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()

    logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'")
    for charge_selection in self.charge_selections:
      self.histogramFile_comp_jetToTauFakeRate[charge_selection] = os.path.join(
        self.outputDir, DKEY_HIST, "comp_jetToTauFakeRate_%s.root" % charge_selection)
      self.histogramDir_numerator[charge_selection] = "jetToTauFakeRate_%s/numerator/" % charge_selection
      self.histogramDir_denominator[charge_selection] = "jetToTauFakeRate_%s/denominator/" % charge_selection
      self.cfgFile_comp_jetToTauFakeRate_modified[charge_selection] = os.path.join(
        self.outputDir, DKEY_CFGS, "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection)
      self.createCfg_comp_jetToTauFakeRate(self.histogramFile_hadd_stage1, self.histogramFile_comp_jetToTauFakeRate[charge_selection],
        self.histogramDir_denominator[charge_selection], self.histogramDir_numerator[charge_selection], self.absEtaBins, self.ptBins,
        self.cfgFile_comp_jetToTauFakeRate_modified[charge_selection])

    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
예제 #21
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            for charge_selection in self.charge_selections:
                key_dir = getKey(process_name, charge_selection)
                for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                    initDict(self.dirs, [key_dir, dir_type])
                    if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                        self.dirs[key_dir][dir_type] = os.path.join(
                            self.configDir, dir_type, self.channel,
                            "_".join([charge_selection]), process_name)
                    else:
                        self.dirs[key_dir][dir_type] = os.path.join(
                            self.outputDir, dir_type, self.channel,
                            "_".join([charge_selection]), process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            apply_genWeight = sample_info["apply_genWeight"] if (
                is_mc and "apply_genWeight" in sample_info.keys()) else False
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]
            apply_trigger_bits = (
                is_mc and
                (self.era == "2015" or
                 (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

            for charge_selection in self.charge_selections:
                for central_or_shift in self.central_or_shifts:

                    inputFileList = inputFileLists[sample_name]
                    for jobId in inputFileList.keys():
                        if central_or_shift != "central" and not is_mc:
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttH"
                        ) and sample_category != "signal":
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttW"
                        ) and sample_category != "TTW":
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttZ"
                        ) and sample_category != "TTZ":
                            continue

                        # build config files for executing analysis code
                        key_dir = getKey(process_name, charge_selection)
                        key_analyze_job = getKey(process_name,
                                                 charge_selection,
                                                 central_or_shift, jobId)

                        ntupleFiles = inputFileList[jobId]
                        if len(ntupleFiles) == 0:
                            print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                                key_job, ntupleFiles)
                            continue
                        self.jobOptions_analyze[key_analyze_job] = {
                          'ntupleFiles' : ntupleFiles,
                          'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                            (self.channel, process_name, charge_selection, central_or_shift, jobId)),
                          'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                            (process_name, charge_selection, central_or_shift, jobId)),
                          'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                            (self.channel, process_name, charge_selection, central_or_shift, jobId)),
                          'sample_category' : sample_category,
                          'triggers' : sample_info["triggers"],
                          'charge_selection' : charge_selection,
                          'jet_minPt' : self.jet_minPt,
                          'jet_maxPt' : self.jet_maxPt,
                          'jet_minAbsEta' : self.jet_minAbsEta,
                          'jet_maxAbsEta' : self.jet_maxAbsEta,
                          'hadTau_selection_denominator' : self.hadTau_selection_denominator,
                          'hadTau_selections_numerator' : self.hadTau_selections_numerator,
                          'absEtaBins' : self.absEtaBins,
                          ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                          'use_HIP_mitigation_mediumMuonId' : True,
                          'is_mc' : is_mc,
                          'central_or_shift' : central_or_shift,
                          'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                          'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                          'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job])

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1 = getKey(process_name,
                                                 charge_selection)
                        if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                        self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                            self.jobOptions_analyze[key_analyze_job]
                            ['histogramFile'])
                        self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                          (self.channel, process_name, charge_selection))

                # initialize input and output file names for hadd_stage2
                key_hadd_stage1 = getKey(process_name, charge_selection)
                key_hadd_stage2 = getKey(charge_selection)
                if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2] = []
                self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                    self.outputFile_hadd_stage1[key_hadd_stage1])
                self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \
                  (self.channel, charge_selection))

        logging.info(
            "Creating configuration files for executing 'comp_jetToTauFakeRate'"
        )
        for charge_selection in self.charge_selections:
            key_comp_jetToTauFakeRate_job = getKey(charge_selection)
            key_hadd_stage2 = getKey(charge_selection)
            self.jobOptions_comp_jetToTauFakeRate[
                key_comp_jetToTauFakeRate_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection),
                    'outputFile':
                    os.path.join(
                        self.dirs[DKEY_HIST],
                        "comp_jetToTauFakeRate_%s.root" % charge_selection),
                    'logFile':
                    os.path.join(
                        self.dirs[DKEY_LOGS],
                        "comp_jetToTauFakeRate_%s.log" % charge_selection),
                    'looseRegion':
                    "jetToTauFakeRate_%s/denominator/" % charge_selection,
                    'tightRegion':
                    "jetToTauFakeRate_%s/numerator/" % charge_selection,
                    'absEtaBins':
                    self.absEtaBins,
                    'ptBins':
                    self.ptBins
                }
            self.createCfg_comp_jetToTauFakeRate(
                self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job])
            self.targets.append(self.jobOptions_comp_jetToTauFakeRate[
                key_comp_jetToTauFakeRate_job]['outputFile'])

        logging.info("Creating configuration files to run 'makePlots'")
        for charge_selection in self.charge_selections:
            key_makePlots_job = getKey(charge_selection)
            key_hadd_stage2 = getKey(charge_selection)
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2],
                'cfgFile_modified':
                os.path.join(self.dirs[DKEY_CFGS],
                             "makePlots_%s_cfg.py" % self.channel),
                'outputFile':
                os.path.join(self.dirs[DKEY_PLOT],
                             "makePlots_%s.png" % self.channel),
                'histogramDir':
                "jetToTauFakeRate_%s" % charge_selection,
                'label':
                None,
                'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"],
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])
            self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
            for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]:
                key_makePlots_job = getKey(charge_selection, absEtaBin,
                                           "denominator")
                key_hadd_stage2 = getKey(charge_selection)
                self.jobOptions_make_plots[key_makePlots_job] = {
                    'executable':
                    self.executable_make_plots,
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "makePlots_%s_%s_denominator_%s_cfg.py" %
                        (self.channel, charge_selection, absEtaBin)),
                    'outputFile':
                    os.path.join(
                        self.dirs[DKEY_PLOT],
                        "makePlots_%s_%s_denominator_%s.png" %
                        (self.channel, charge_selection, absEtaBin)),
                    'histogramDir':
                    "jetToTauFakeRate_%s/denominator/%s" %
                    (charge_selection, absEtaBin),
                    'label':
                    None,
                    'make_plots_backgrounds':
                    ["TT", "TTW", "TTZ", "EWK", "Rares"],
                }
                self.createCfg_makePlots(
                    self.jobOptions_make_plots[key_makePlots_job])
                for hadTau_selection_numerator in self.hadTau_selections_numerator:
                    key_makePlots_job = getKey(charge_selection, absEtaBin,
                                               "numerator",
                                               hadTau_selection_numerator)
                    key_hadd_stage2 = getKey(charge_selection)
                    self.jobOptions_make_plots[key_makePlots_job] = {
                        'executable':
                        self.executable_make_plots,
                        'inputFile':
                        self.outputFile_hadd_stage2[key_hadd_stage2],
                        'cfgFile_modified':
                        os.path.join(
                            self.dirs[DKEY_CFGS],
                            "makePlots_%s_%s_numerator_%s_%s_cfg.py" %
                            (self.channel, charge_selection,
                             hadTau_selection_numerator, absEtaBin)),
                        'outputFile':
                        os.path.join(
                            self.dirs[DKEY_PLOT],
                            "makePlots_%s_%s_numerator_%s_%s.png" %
                            (self.channel, charge_selection,
                             hadTau_selection_numerator, absEtaBin)),
                        'histogramDir':
                        "jetToTauFakeRate_%s/numerator/%s/%s" %
                        (charge_selection, hadTau_selection_numerator,
                         absEtaBin),
                        'label':
                        None,
                        'make_plots_backgrounds':
                        ["TT", "TTW", "TTZ", "EWK", "Rares"],
                    }
                    self.createCfg_makePlots(
                        self.jobOptions_make_plots[key_makePlots_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_comp_jetToTauFakeRate)
            self.sbatchFile_comp_jetToTauFakeRate = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
            self.createScript_sbatch(self.executable_comp_jetToTauFakeRate,
                                     self.sbatchFile_comp_jetToTauFakeRate,
                                     self.jobOptions_comp_jetToTauFakeRate)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
예제 #22
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in ["Tight"]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for lepton_charge_selection in self.lepton_charge_selections:

            if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS':
              # Run MC closure only for the region that complements the SR
              continue

            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, lepton_charge_selection, sample_info
                ):
                  continue

                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)                
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")    

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for lepton_charge_selection in self.lepton_charge_selections:

          if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS':
            # Run MC closure only for the region that complements the SR
            continue

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue

            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, lepton_charge_selection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, lepton_charge_selection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                syncOutput = ''
                syncTree = ''
                syncGenMatch = self.lepton_genMatches_nonfakes
                if self.do_sync:
                  mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                  if lepton_selection_and_frWeight == 'Tight':
                    if lepton_charge_selection == 'SS':
                      syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                      syncTree = 'syncTree_%s_SR' % self.channel
                    elif lepton_charge_selection == 'OS':
                      syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Flip.root' % (self.channel, central_or_shift))
                      syncTree = 'syncTree_%s_Flip' % self.channel
                    else:
                      continue
                  elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights' and lepton_charge_selection == 'SS':
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                    syncTree = 'syncTree_%s_Fake' % self.channel
                  elif mcClosure_match and lepton_charge_selection == 'SS':
                    mcClosure_type = mcClosure_match.group('type')
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                    syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type)
                  else:
                    continue
                if syncTree and central_or_shift != "central":
                  syncTree = os.path.join(central_or_shift, syncTree)
                syncRLE = ''
                if self.do_sync and self.rle_select:
                  syncRLE = self.rle_select % syncTree
                  if not os.path.isfile(syncRLE):
                    logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                    continue
                if syncOutput:
                  self.inputFiles_sync['sync'].append(syncOutput)

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'leptonChargeSelection'    : lepton_charge_selection,
                  'hadTauSelection_veto'     : hadTauVeto_selection,
                  'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'syncOutput'               : syncOutput,
                  'syncTree'                 : syncTree,
                  'syncRLE'                  : syncRLE,
                  'useNonNominal'            : self.use_nonnominal,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'syncGenMatch'             : syncGenMatch,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.do_sync: continue

            if is_mc:
              logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name)

              sample_categories = [ sample_category ]
              for sample_category in sample_categories:
                # sum non-fake and fake contributions for each MC sample separately
                genMatch_categories = [ "nonfake", "Convs", "fake", "flip" ]
                for genMatch_category in genMatch_categories:
                  key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection)
                  key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, "addBackgrounds")
                  addBackgrounds_job_tuple = None
                  processes_input = None
                  process_output = None
                  if genMatch_category == "nonfake":
                    # sum non-fake contributions for each MC sample separately
                    # input processes: TT2l0g0j; ...
                    # output processes: TT; ...
                    if sample_category in self.ttHProcs:
                      lepton_genMatches = []
                      lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                      lepton_genMatches.extend(self.lepton_genMatches_Convs)
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ]
                    process_output = sample_category
                    addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  elif genMatch_category == "Convs":
                    # sum conversion background  contributions for each MC sample separately
                    # input processes: TT1l1g0j, TT0l2g0j; ...
                    # output processes: TT_Convs; ...
                    if sample_category in self.ttHProcs:
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_Convs ]
                    process_output = "%s_Convs" % sample_category
                    addBackgrounds_job_tuple = (process_name, "%s_Convs" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  elif genMatch_category == "fake":
                    # sum fake contributions for each MC sample separately
                    # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ...
                    # output processes: TT_fake; ...
                    if sample_category in self.ttHProcs:
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                    process_output = "%s_fake" % sample_category
                    addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  elif genMatch_category == "flip":
                    # sum flip contributions for each MC sample separately
                    # input processes:  TT2l2f0g0j&2t0e0m0j, TT2l1f0g0j&2t0e0m0j; ...
                    # output processes: TT_flip; ...
                    if sample_category in self.ttHProcs:
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_flips ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_flips ]
                    process_output = "%s_flip" % sample_category
                    addBackgrounds_job_tuple = (process_name, "%s_flip" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  if processes_input:
                    logging.info(" ...for genMatch option = '%s'" % genMatch_category)
                    key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple)
                    cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple)
                    outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_tuple)
                    self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                      'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                      'cfgFile_modified' : cfgFile_modified,
                      'outputFile' : outputFile,
                      'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                      'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
                      'processes_input' : processes_input,
                      'process_output' : process_output
                    }
                    self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                    # initialize input and output file names for hadd_stage1_5
                    key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection)
                    hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection)
                    key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
                    if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                      self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                    self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

            # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
            if not is_mc:
              key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection)
              key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection)
              if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

          if self.do_sync: continue

          # sum fake background contributions for the total of all MC samples
          # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum flip background contributions for the total of all MC sample
          # input processes: TT2l1f0g0j,TT2l2f0g0j; ...
          # output process: flips_mc
          addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_flip" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "flips_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips])

          # sum conversion background contributions for the total of all MC samples
          # input processes: TT1l1g0j, TT0l2g0j; ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_syncNtuple(lines_makefile)
      outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
      self.outputFile_sync['sync'] = outputFile_sync_path
      self.addToMakefile_hadd_sync(lines_makefile)
      self.addToMakefile_validate(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for lepton_charge_selection in self.lepton_charge_selections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), lepton_charge_selection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      addFakes_job_tuple = (lepton_charge_selection)
      key_addFakes_job = getKey("data_fakes", lepton_charge_selection)
      category_sideband = None
      if self.applyFakeRateWeights == "2lepton":
        category_sideband = "ttWctrl_%s_Fakeable_wFakeRateWeights" % lepton_charge_selection
      else:
        raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % addFakes_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % addFakes_job_tuple),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % addFakes_job_tuple),
        'category_signal' : "ttWctrl_%s_Tight" % lepton_charge_selection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), lepton_charge_selection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    #--------------------------------------------------------------------------
    # CV: add histograms in OS and SS regions,
    #     so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_addFakes_job = getKey("data_fakes", "OS")
    key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6:
        self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = []
    self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])
    self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
    self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST], "hadd_stage1_6_Tight_OS.root")
    #--------------------------------------------------------------------------

    logging.info("Creating configuration files to run 'addBackgroundFlips'")
    key_addFlips_dir = getKey("addBackgroundLeptonFlips")
    key_addFlips_job = getKey("data_flips")
    self.jobOptions_addFlips[key_addFlips_job] = {
      'inputFile' : self.outputFile_hadd_stage1_6,
      'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_cfg.py"),
      'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips.root"),
      'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips.log"),
      'category_signal' : "ttWctrl_SS_Tight",
      'category_sideband' : "ttWctrl_OS_Tight"
    }
    self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job])
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)      
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      key_add_syst_fakerate_dir = getKey("addSystFakeRates")
      add_syst_fakerate_job_tuple = (self.channel, histogramToFit)
      key_add_syst_fakerate_job = getKey(histogramToFit)
      self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
        'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
        'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple),
        'category' : self.channel,
        'histogramToFit' : histogramToFit,
        'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
      }
      histogramDir_nominal = self.histogramDir_prep_dcard
      for lepton_type in [ 'e', 'm' ]:
        lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
        if lepton_mcClosure not in self.lepton_selections:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
        key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, 'SS')
        histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, 'SS')]
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
          'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
          'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
          'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
          'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
        })
      self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

      logging.info("Creating configuration files to run 'makePlots'")
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_dir = getKey("makePlots")
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : "t#bar{t}W control region",
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
    self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
    self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips)
      self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
예제 #23
0
    def create(self):
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            central_or_shifts_extended = [""]
            central_or_shifts_extended.extend(self.central_or_shifts)
            central_or_shifts_extended.extend(
                ["hadd", "copyHistograms", "addBackgrounds"])
            for central_or_shift_or_dummy in central_or_shifts_extended:
                process_name_extended = [process_name, "hadd"]
                for process_name_or_dummy in process_name_extended:
                    if process_name_or_dummy in [
                            "hadd"
                    ] and central_or_shift_or_dummy != "":
                        continue
                    if central_or_shift_or_dummy in [
                            "hadd", "copyHistograms", "addBackgrounds"
                    ] and process_name_or_dummy in ["hadd"]:
                        continue
                    key_dir = getKey(process_name_or_dummy,
                                     central_or_shift_or_dummy)
                    for dir_type in [
                            DKEY_CFGS, DKEY_LOGS, DKEY_RLES, DKEY_SYNC
                    ]:
                        initDict(self.dirs, [key_dir, dir_type])
                        if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                            self.dirs[key_dir][dir_type] = os.path.join(
                                self.configDir, dir_type, self.channel,
                                process_name_or_dummy,
                                central_or_shift_or_dummy)
                        else:
                            self.dirs[key_dir][dir_type] = os.path.join(
                                self.outputDir, dir_type, self.channel,
                                process_name_or_dummy,
                                central_or_shift_or_dummy)

        for subdirectory in [
                "addBackgrounds", "addBackgroundLeptonFakes",
                "prepareDatacards", "addSystFakeRates", "makePlots"
        ]:
            key_dir = getKey(subdirectory)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD,
                    DKEY_PLOT
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, subdirectory)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, subdirectory)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_SYNC, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            for jobId in inputFileList.keys():
                for central_or_shift in self.central_or_shifts:
                    logging.info(" ... for systematic uncertainty %s" %
                                 central_or_shift)

                    key_analyze_dir = getKey(process_name, central_or_shift)
                    analyze_job_tuple = (process_name, central_or_shift, jobId)
                    key_analyze_job = getKey(*analyze_job_tuple)
                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print("Warning: no ntupleFiles --> skipping job !!")
                        continue

                    syncOutput = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_SYNC],
                        '%s_%s.root' % (self.channel, central_or_shift))
                    syncOutputTree = self.output_tree if central_or_shift == "central" else os.path.join(
                        central_or_shift, self.output_tree)
                    self.inputFiles_sync['sync'].append(syncOutput)

                    cfgFile_modified_path = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_CFGS],
                        "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple)
                    logFile_path = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_LOGS],
                        "analyze_%s_%s_%i.log" % analyze_job_tuple)

                    self.jobOptions_analyze[key_analyze_job] = {
                        'ntupleFiles':
                        ntupleFiles,
                        'cfgFile_modified':
                        cfgFile_modified_path,
                        'histogramFile':
                        '',
                        'logFile':
                        logFile_path,
                        'syncTree':
                        syncOutputTree,
                        'syncOutput':
                        syncOutput,
                        'syncRLE':
                        self.rle_select if self.rle_select
                        and '%s' not in self.rle_select else '',
                        'useNonNominal':
                        self.use_nonnominal,
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job], sample_info)

        logging.info(
            "Creating script for submitting '%s' jobs to batch system" %
            self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze,
                                            self.sbatchFile_analyze,
                                            self.jobOptions_analyze)
        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC,
                                            '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.addToMakefile_hadd_sync(lines_makefile)
        self.targets.extend(self.phoniesToAdd)
        self.createMakefile(lines_makefile)
        logging.info("Done")
        return self.num_jobs
예제 #24
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")

            logging.info("Building dictionaries for sample %s..." %
                         process_name)
            for charge_selection in self.charge_selections:
                central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
                central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
                for central_or_shift_or_dummy in central_or_shifts_extended:
                    process_name_extended = [process_name, "hadd"]
                    for process_name_or_dummy in process_name_extended:
                        if central_or_shift_or_dummy in [
                                "hadd"
                        ] and process_name_or_dummy in ["hadd"]:
                            continue
                        if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions:
                            if not is_mc:
                                continue
                            if not self.accept_central_or_shift(
                                    central_or_shift_or_dummy, sample_info):
                                continue

                        key_dir = getKey(process_name_or_dummy,
                                         charge_selection,
                                         central_or_shift_or_dummy)
                        for dir_type in [
                                DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES
                        ]:
                            initDict(self.dirs, [key_dir, dir_type])
                            if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                                self.dirs[key_dir][dir_type] = os.path.join(
                                    self.configDir, dir_type, self.channel,
                                    "_".join([charge_selection]),
                                    process_name_or_dummy,
                                    central_or_shift_or_dummy)
                            else:
                                self.dirs[key_dir][dir_type] = os.path.join(
                                    self.outputDir, dir_type, self.channel,
                                    "_".join([charge_selection]),
                                    process_name_or_dummy)
        for subdirectory in ["comp_jetToTauFakeRate", "makePlots"]:
            key_dir = getKey(subdirectory)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, subdirectory)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, subdirectory)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            inputFileList = inputFileLists[sample_name]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            sample_category = sample_info["sample_category"]

            for charge_selection in self.charge_selections:
                for central_or_shift in self.central_or_shifts:

                    if central_or_shift != "central" and not is_mc:
                        continue
                    if not self.accept_central_or_shift(
                            central_or_shift, sample_info):
                        continue

                    # build config files for executing analysis code
                    key_analyze_dir = getKey(process_name, charge_selection,
                                             central_or_shift)

                    for jobId in inputFileList.keys():

                        analyze_job_tuple = (process_name, charge_selection,
                                             central_or_shift, jobId)
                        key_analyze_job = getKey(*analyze_job_tuple)
                        ntupleFiles = inputFileList[jobId]
                        if len(ntupleFiles) == 0:
                            logging.warning(
                                "No input ntuples for %s --> skipping job !!" %
                                (key_analyze_job))
                            continue

                        cfgFile_modified_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_CFGS],
                            "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                        logFile_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_LOGS],
                            "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
                        histogramFile_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_HIST],
                            "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)
                        rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                          if self.select_rle_output else ""

                        self.jobOptions_analyze[key_analyze_job] = {
                            'ntupleFiles': ntupleFiles,
                            'cfgFile_modified': cfgFile_modified_path,
                            'histogramFile': histogramFile_path,
                            'logFile': logFile_path,
                            'chargeSelection': charge_selection,
                            'jet_minPt': self.jet_minPt,
                            'jet_maxPt': self.jet_maxPt,
                            'jet_minAbsEta': self.jet_minAbsEta,
                            'jet_maxAbsEta': self.jet_maxAbsEta,
                            'hadTau_selection_tight':
                            self.hadTau_selection_tight,
                            'hadTauSelection_denominator':
                            self.hadTau_selection_denominator,
                            'hadTauSelections_numerator':
                            self.hadTau_selections_numerator,
                            'trigMatchingOptions': self.trigMatchingOptions,
                            'selEventsFileName_output': rleOutputFile_path,
                            'absEtaBins': self.absEtaBins,
                            'decayModes': self.decayModes,
                            'central_or_shift': central_or_shift,
                            'central_or_shifts_local': [],
                            'apply_hlt_filter': self.hlt_filter,
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job],
                            sample_info)

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1_dir = getKey(process_name,
                                                     charge_selection)
                        hadd_stage1_job_tuple = (process_name,
                                                 charge_selection)
                        key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                        if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[
                                key_hadd_stage1_job] = []
                        self.inputFiles_hadd_stage1[
                            key_hadd_stage1_job].append(
                                self.jobOptions_analyze[key_analyze_job]
                                ['histogramFile'])
                        self.outputFile_hadd_stage1[
                            key_hadd_stage1_job] = os.path.join(
                                self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                "hadd_stage1_%s_%s.root" %
                                hadd_stage1_job_tuple)

                # initialize input and output file names for hadd_stage2
                key_hadd_stage1_job = getKey(process_name, charge_selection)
                key_hadd_stage2_dir = getKey("hadd", charge_selection)
                key_hadd_stage2_job = getKey(charge_selection)
                if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(
                    self.outputFile_hadd_stage1[key_hadd_stage1_job])
                self.outputFile_hadd_stage2[
                    key_hadd_stage2_job] = os.path.join(
                        self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                        "hadd_stage2_%s.root" % charge_selection)

        logging.info(
            "Creating configuration files for executing 'comp_jetToTauFakeRate'"
        )
        for charge_selection in self.charge_selections:
            charge_key = "comp_%s" % charge_selection
            self.comp_input_files[charge_key] = []
            for trigMatchingOption in self.trigMatchingOptions:
                key_hadd_stage2_job = getKey(charge_selection)
                key_comp_jetToTauFakeRate_dir = getKey("comp_jetToTauFakeRate")
                key_comp_jetToTauFakeRate_job = getKey(charge_selection,
                                                       trigMatchingOption)
                self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job] = {
                        'inputFile':
                        self.outputFile_hadd_stage2[key_hadd_stage2_job],
                        'cfgFile_modified':
                        os.path.join(
                            self.dirs[DKEY_CFGS],
                            "comp_jetToTauFakeRate_%s_%s_cfg.py" %
                            (charge_selection, trigMatchingOption)),
                        'outputFile':
                        os.path.join(
                            self.dirs[DKEY_HIST],
                            "comp_jetToTauFakeRate_%s_%s.root" %
                            (charge_selection, trigMatchingOption)),
                        'logFile':
                        os.path.join(
                            self.dirs[DKEY_LOGS],
                            "comp_jetToTauFakeRate_%s_%s.log" %
                            (charge_selection, trigMatchingOption)),
                        'looseRegion':
                        "jetToTauFakeRate_%s_%s/denominator/" %
                        (charge_selection, trigMatchingOption),
                        'tightRegion':
                        "jetToTauFakeRate_%s_%s/numerator/" %
                        (charge_selection, trigMatchingOption),
                        'absEtaBins':
                        self.absEtaBins,
                        'ptBins':
                        self.ptBins,
                        'decayModes':
                        self.decayModes,
                        'hadTauSelections':
                        self.hadTau_selections_numerator,
                        'trigMatchingOption':
                        trigMatchingOption,
                        'plots_outputFileName':
                        os.path.join(
                            self.dirs[key_comp_jetToTauFakeRate_dir]
                            [DKEY_PLOT], "comp_jetToTauFakeRate_%s.png" %
                            trigMatchingOption)
                    }
                self.createCfg_comp_jetToTauFakeRate(
                    self.jobOptions_comp_jetToTauFakeRate[
                        key_comp_jetToTauFakeRate_job])
                comp_output = self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job]['outputFile']
                self.targets.append(comp_output)
                self.comp_input_files[charge_key].append(comp_output)
            self.comp_output_files[charge_key] = os.path.join(
                self.dirs[DKEY_HIST],
                "comp_jetToTauFakeRate_%s.root" % charge_selection)

        logging.info("Creating configuration files to run 'makePlots'")
        for charge_selection in self.charge_selections:
            key_hadd_stage2_job = getKey(charge_selection)
            key_makePlots_dir = getKey("makePlots")
            key_makePlots_job = getKey(charge_selection)
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2_job],
                'cfgFile_modified':
                os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS],
                             "makePlots_%s_cfg.py" % self.channel),
                'outputFile':
                os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT],
                             "makePlots_%s.png" % self.channel),
                'histogramDir':
                "jetToTauFakeRate_%s" % charge_selection,
                'label':
                None,
                'make_plots_backgrounds':
                self.make_plots_backgrounds
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])
            for trigMatchingOption in self.trigMatchingOptions:
                self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
                for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]:
                    key_hadd_stage2_job = getKey(charge_selection)
                    key_makePlots_job = getKey(charge_selection,
                                               trigMatchingOption, absEtaBin,
                                               "denominator")
                    self.jobOptions_make_plots[key_makePlots_job] = {
                      'executable' : self.executable_make_plots,
                      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
                      'cfgFile_modified' : os.path.join(
                        self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_denominator_%s_cfg.py" % \
                          (self.channel, charge_selection, trigMatchingOption, absEtaBin)),
                      'outputFile' : os.path.join(
                        self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_denominator_%s.png" % (self.channel, charge_selection, trigMatchingOption, absEtaBin)),
                      'histogramDir' : "jetToTauFakeRate_%s_%s/denominator/%s" % (charge_selection, trigMatchingOption, absEtaBin),
                      'label' : None,
                      'make_plots_backgrounds' : self.make_plots_backgrounds
                    }
                    self.createCfg_makePlots(
                        self.jobOptions_make_plots[key_makePlots_job])
                    for hadTau_selection_numerator in self.hadTau_selections_numerator:
                        key_hadd_stage2_job = getKey(charge_selection)
                        key_makePlots_job = getKey(charge_selection,
                                                   trigMatchingOption,
                                                   absEtaBin, "numerator",
                                                   hadTau_selection_numerator)
                        self.jobOptions_make_plots[key_makePlots_job] = {
                          'executable' : self.executable_make_plots,
                          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
                          'cfgFile_modified' : os.path.join(
                            self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_numerator_%s_%s_cfg.py" % \
                              (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)),
                          'outputFile' : os.path.join(
                            self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_numerator_%s_%s.png" % \
                              (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)),
                          'histogramDir' : "jetToTauFakeRate_%s_%s/numerator/%s/%s" % (charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin),
                          'label' : None,
                          'make_plots_backgrounds' : self.make_plots_backgrounds
                        }
                        self.createCfg_makePlots(
                            self.jobOptions_make_plots[key_makePlots_job])

        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.sbatchFile_comp_jetToTauFakeRate = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_comp_jetToTauFakeRate)
            self.createScript_sbatch(self.executable_comp_jetToTauFakeRate,
                                     self.sbatchFile_comp_jetToTauFakeRate,
                                     self.jobOptions_comp_jetToTauFakeRate)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile,
                                       make_dependency="phony_hadd_stage1",
                                       max_mem='4096M')
        self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
        self.addToMakefile_comp_hadd(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done.")

        return self.num_jobs
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
        central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
        for central_or_shift_or_dummy in central_or_shifts_extended:
          process_name_extended = [ process_name, "hadd" ]
          for process_name_or_dummy in process_name_extended:
            if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
              continue
            if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions:
              if not is_mc:
                continue
              if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info):
                continue

            key_dir = getKey(process_name_or_dummy, lepton_selection, central_or_shift_or_dummy)
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                  "_".join([ lepton_selection ]), process_name_or_dummy, central_or_shift_or_dummy)
              else:
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                  "_".join([ lepton_selection ]), process_name_or_dummy)
    for subdirectory in [ "prepareDatacards" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        process_name = sample_info["process_name_specific"]
        
        logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
        is_mc = (sample_info["type"] == "mc")
        inputFileList = inputFileLists[sample_name]
        for central_or_shift in self.central_or_shifts:
          if central_or_shift != "central" and not is_mc:
            continue

          # build config files for executing analysis code
          key_analyze_dir = getKey(process_name, lepton_selection, central_or_shift)

          for jobId in inputFileList.keys():
            analyze_job_tuple = (process_name, lepton_selection, central_or_shift, jobId)
            key_analyze_job = getKey(*analyze_job_tuple)
            ntupleFiles = inputFileList[jobId]
            if len(ntupleFiles) == 0:
              logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
              continue

            cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
            logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
            rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                 if self.select_rle_output else ""
            histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)

            self.jobOptions_analyze[key_analyze_job] = {
              'ntupleFiles'              : ntupleFiles,
              'cfgFile_modified'         : cfgFile_modified_path,
              'histogramFile'            : histogramFile_path,
              'logFile'                  : logFile_path,
              'selEventsFileName_output' : rleOutputFile_path,
              'leptonSelection'          : lepton_selection,
              'applyFakeRateWeights'     : "disabled",
              'central_or_shift'         : central_or_shift,
            }
            self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info)

            # initialize input and output file names for hadd_stage1
            key_hadd_stage1_dir = getKey(process_name, lepton_selection)
            hadd_stage1_job_tuple = (process_name, lepton_selection)
            key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
            if not key_hadd_stage1_job in self.inputFiles_hadd_stage1.keys():
              self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
            self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
            self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                            "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple)

        # initialize input and output file names for hadd_stage2
        key_hadd_stage1_job = getKey(process_name, lepton_selection)
        key_hadd_stage2_dir = getKey("hadd", lepton_selection)
        key_hadd_stage2_job = getKey(lepton_selection)
        if not key_hadd_stage2_job in self.inputFiles_hadd_stage2.keys():
          self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
        self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                        "hadd_stage2_%s.root" % lepton_selection)

    logging.info("Creating configuration files to run 'prepareDatacards'")
    processesToCopy = []
    for process in self.prep_dcard_processesToCopy:
      processesToCopy.append(process)
    self.prep_dcard_processesToCopy = processesToCopy
    processesToCopy = []
    for process in self.prep_dcard_signals:
      processesToCopy.append(process)
    self.prep_dcard_signals = processesToCopy
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey("Tight")
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)
      datacardFile = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple)
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : datacardFile,
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      jobOptions_makefile = copy.deepcopy(self.jobOptions_postFit)
      jobOptions_makefile['fit_result'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'fit_{}'.format(histogramToFit), jobOptions_makefile['target']
      )
      jobOptions_makefile['hadd_stage2'] = self.outputFile_hadd_stage2[key_hadd_stage2_job]
      jobOptions_makefile['prepare_datacard'] = datacardFile
      jobOptions_makefile['data_datacard'] = os.path.join(
        self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_data_%s_%s.root" % prep_dcard_job_tuple
      )
      jobOptions_makefile['pseudodata_datacard'] = os.path.join(
        self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_pseudodata_%s_%s.root" % prep_dcard_job_tuple
      )
      jobOptions_makefile['makefile'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'Makefile_{}'.format(histogramToFit)
      )
      jobOptions_makefile['stdout'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'stdout_{}.log'.format(histogramToFit)
      )
      self.createCfg_postFit(jobOptions_makefile)

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = "phony_hadd_stage1")
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_postFit(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
예제 #26
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
            continue

          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for chargeSumSelection in self.chargeSumSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                      central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info
                    ):
                  continue

                key_dir = getKey(process_name_or_dummy, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel,
                      "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")   

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e|t)_wFakeRateWeights')
    for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
      lepton_selection = lepton_and_hadTau_selection
      hadTau_selection = lepton_and_hadTau_selection
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      if self.applyFakeRateWeights == "3tau":
        lepton_selection = "Tight"
      hadTau_selection = "|".join([ hadTau_selection, self.hadTau_selection_part2 ])

      if lepton_and_hadTau_selection == "forBDTtraining":
        lepton_selection = "Loose"
        electron_selection = lepton_selection
        muon_selection = lepton_selection
        hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed
      elif lepton_and_hadTau_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
        hadTau_selection = "Tight"
        hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])
      elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"
        hadTau_selection = "Tight"
        hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])
      elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t":
        electron_selection = "Tight"
        muon_selection = "Tight"
        hadTau_selection = "Fakeable"
        hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])

      for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
        if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
          continue
        if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)

        for chargeSumSelection in self.chargeSumSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTau_selection,
                  'apply_hadTauGenMatching'  : self.apply_hadTauGenMatching,
                  'chargeSumSelection'       : chargeSumSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'selectBDT'                : self.isBDTtraining,
                  'gen_mHH'                  : self.gen_mHH,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
                hadd_stage1_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
            hadd_stage1_5_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = self.get_sample_categories()
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          if self.isBDTtraining:
            continue

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          hadd_stage2_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_and_hadTau_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for chargeSumSelection in self.chargeSumSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", chargeSumSelection)      
      category_sideband = None
      if self.applyFakeRateWeights == "4L":
        category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection
      elif self.applyFakeRateWeights == "3tau":
        category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection
      else:
        raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection),
        'category_signal' : "hh_1l_3tau_%s_Tight" % chargeSumSelection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'...")
    for histogramToFit in self.histograms_to_fit:
      logging.info(" ...  for histogram %s" % histogramToFit)
      prep_dcard_HH = set()
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        sample_category = sample_info["sample_category"]
        if sample_category.startswith("signal"):
          sample_category = sample_info["sample_category_hh"]
          doAdd = False
          if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
            if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category:
              doAdd = True
            if ("spin0" in histogramToFit and "spin0" in sample_category) or ("spin2" in histogramToFit and "spin2" in sample_category):
              startpos = None
              for pattern in [ "MVAOutput", "BDTOutput" ]:
                if pattern in histogramToFit:
                  startpos = histogramToFit.find(pattern) + len(pattern) + 1 # CV: increment startpos by 1 to account for trailing "_"
              if not startpos:
                raise ValueError("Failed to parse histogram name = '%s' !!" % histogramToFit) 
              endpos = histogramToFit.find("_", startpos)
              masspoint = histogramToFit[startpos:endpos]
              if ("_%s_" % masspoint) in sample_category:
                doAdd = True
          else:
            doAdd = True
          if doAdd:
            if "_wwww" in sample_category:
              prep_dcard_HH.add(sample_category.replace("_wwww", "_zzzz"))
              prep_dcard_HH.add(sample_category.replace("_wwww", "_wwww"))
              prep_dcard_HH.add(sample_category.replace("_wwww", "_zzww"))
              if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit):
                prep_dcard_HH.add(sample_category.replace("_wwww", ""))
            elif "_wwtt" in sample_category:
              prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttzz"))
              prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttww"))
              if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit):
                prep_dcard_HH.add(sample_category.replace("_wwtt", ""))
            elif "_tttt" in sample_category:
              prep_dcard_HH.add(sample_category)
              if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit):
                prep_dcard_HH.add(sample_category.replace("_tttt", ""))
            else:
              raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category)
      prep_dcard_HH = list(prep_dcard_HH)
      prep_dcard_H = []
      prep_dcard_other_nonfake_backgrounds = []
      for process in self.nonfake_backgrounds:
        if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]:
          prep_dcard_H.append("%s_hww" % process)
          prep_dcard_H.append("%s_hzz" % process)
          prep_dcard_H.append("%s_htt" % process)
          prep_dcard_H.append("%s_hbb" % process)
        else:
          prep_dcard_other_nonfake_backgrounds.append(process)
      self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "fakes_mc" ]
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : None
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])
      if "SS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : 'SS'
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      #  - 'CMS_ttHl_Clos_norm_t'
      #  - 'CMS_ttHl_Clos_shape_t'
      for chargeSumSelection in self.chargeSumSelections:
        key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit)
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if chargeSumSelection == "OS":
          histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard
        elif chargeSumSelection == "SS":
          histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection)
        for lepton_and_hadTau_type in [ 'e', 'm', 't' ]:
          lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type
          if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections:
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_and_hadTau_mcClosure, chargeSumSelection)]
          if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
            histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard")
            histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard")
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections,
            'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : '1l+3#tau_{h}',
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_job = getKey("SS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "1l+3#tau_{h} SS",
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "OS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
예제 #27
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])
  
    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  

      is_mc = (sample_info["type"] == "mc")
      lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"]
      apply_genWeight = sample_info["apply_genWeight"] if (is_mc and "apply_genWeight" in sample_info.keys()) else False
      sample_category = sample_info["sample_category"]
      triggers = sample_info["triggers"]
      apply_trigger_bits = (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        lepton_selection = lepton_and_hadTau_selection
        if self.applyFakeRateWeights == "2tau":
          lepton_selection = "Tight"
        hadTau_selection = "|".join([ lepton_and_hadTau_selection, self.hadTau_selection_part2 ])        
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for hadTau_charge_selection in self.hadTau_charge_selections:
            for central_or_shift in self.central_or_shifts:

              inputFileList = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)
              for jobId in inputFileList.keys():
                if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and hadTau_charge_selection == "OS"):
                  continue
                if central_or_shift != "central" and not is_mc:
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
                  continue

                key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
                key_file = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection, central_or_shift, jobId)

                self.ntupleFiles[key_file] = inputFileList[jobId]
                if len(self.ntupleFiles[key_file]) == 0:
                  print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, self.ntupleFiles[key_file])
                  continue
                self.cfgFiles_analyze_modified[key_file] = os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%i_cfg.py" % \
                  (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId))
                self.histogramFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%i.root" % \
                  (process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId))
                self.logFiles_analyze[key_file] = os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%i.log" % \
                  (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId))
                self.rleOutputFiles[key_file] = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%s_%i.txt" % \
                  (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) if self.select_rle_output else ""

                applyFakeRateWeights = self.applyFakeRateWeights
                if lepton_and_hadTau_frWeight == "disabled":
                  applyFakeRateWeights = "disabled"
                self.createCfg_analyze(self.ntupleFiles[key_file], self.histogramFiles[key_file], sample_category, self.era, triggers,
                  lepton_selection, self.apply_leptonGenMatching, hadTau_selection, self.apply_hadTauGenMatching, hadTau_charge_selection,
                  applyFakeRateWeights, is_mc, central_or_shift, lumi_scale, apply_genWeight, apply_trigger_bits, self.cfgFiles_analyze_modified[key_file],
                  self.rleOutputFiles[key_file])
                
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch()

    logging.info("Creating configuration files for executing 'addBackgrounds'")  
    process_names = []
    process_names.extend(self.nonfake_backgrounds)
    process_names.extend([ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ])
    # sum non-fake contributions for each MC sample separately
    # input processes: TT2t0e0m0j, TT1t1e0m0j, TT1t0e1m0j", TT0t2e0m0j, TT0t1e1m0j, TT0t0e2m0j; TTW2t0e0m0j,...
    # output processes: TT; ...
    for process_name in process_names:
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)          
          for hadTau_charge_selection in self.hadTau_charge_selections:
            key = getKey(process_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
            if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
              continue
            if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
              continue
            self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_%s_%s_%s.root" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))        
            self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_%s_%s_%s_cfg.py" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))
            histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
            processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]
            # CV: treat fakes in ttH signal events as "signal", not as "background"
            ##if process_name in [ "signal", "ttH_htt", "ttH_hww", "ttH_hzz" ]:
            ##  processes_input.extend([ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ])
            self.process_output_addBackgrounds[key] = process_name
            self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
              [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
    # sum fake contributions for each MC sample separately
    # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,...
    # output processes: fakes_TT; ...
    for process_name in process_names:
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)          
          for hadTau_charge_selection in self.hadTau_charge_selections:
            key = getKey("fakes_%s" % process_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
            if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
              continue
            if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
              continue
            self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_%s_%s_%s.root" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))        
            self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_%s_%s_%s_cfg.py" % \
              (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))
            histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
            processes_input = [ "%s%s" % (process_name, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]
            self.process_output_addBackgrounds[key] = "fakes_%s" % process_name
            self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
              [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])        
    # sum fake contributions for the total of all MC sample
    # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,...
    # output process: fakes_mc
    for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
      for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
        if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
          continue
        if lepton_and_hadTau_selection == "Fakeable_mcClosure" and not lepton_and_hadTau_frWeight == "enabled":
          continue
        lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
        for hadTau_charge_selection in self.hadTau_charge_selections:
          key = getKey(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
          self.histogramFile_addBackgrounds[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgrounds_%s_fakes_mc_%s_%s.root" % \
            (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))
          self.cfgFile_addBackgrounds_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \
            (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))
          histogramDir = getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)
          processes_input = []
          for process_name in self.nonfake_backgrounds:
            for genMatch in self.lepton_and_hadTau_genMatches_fakes:
              processes_input.append("%s%s" % (process_name, genMatch))
          self.process_output_addBackgrounds[key] = "fakes_mc"
          self.createCfg_addBackgrounds(self.histogramFile_hadd_stage1, self.histogramFile_addBackgrounds[key], self.cfgFile_addBackgrounds_modified[key],
            [ histogramDir ], processes_input, self.process_output_addBackgrounds[key])
        
    logging.info("Creating configuration files for executing 'addBackgroundFakes'")
    for hadTau_charge_selection in self.hadTau_charge_selections:
      key = getKey("fakes_data", hadTau_charge_selection) 
      self.histogramFile_addFakes[key] = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundFakes_%s_%s.root" % \
        (self.channel, hadTau_charge_selection))
      self.cfgFile_addFakes_modified[key] = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundFakes_%s_%s_cfg.py" % \
        (self.channel, hadTau_charge_selection))
      category_signal = "1l_2tau_%s_Tight" % hadTau_charge_selection
      category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection
      self.createCfg_addFakes(self.histogramFile_hadd_stage1_5, self.histogramFile_addFakes[key], self.cfgFile_addFakes_modified[key],
        category_signal, category_sideband)  

    logging.info("Creating configuration files for executing 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      self.createCfg_prep_dcard(histogramToFit)
      if "SS" in self.hadTau_charge_selections:
        self.createCfg_prep_dcard(histogramToFit, self.histogramDir_prep_dcard_SS, "SS")

    logging.info("Creating configuration files for executing 'makePlots'")
    self.createCfg_makePlots()
    if "SS" in self.hadTau_charge_selections:
      self.createCfg_makePlots(self.histogramDir_prep_dcard_SS, "SS")
    if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections:
      self.createCfg_makePlots_mcClosure()  
      
    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_make_plots_mcClosure(lines_makefile)   
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
예제 #28
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, "",
                        process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, "",
                        process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT,
                DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            is_signal = (sample_category == "signal")

            for central_or_shift in self.central_or_shifts:

                inputFileList = inputFileLists[sample_name]
                for jobId in inputFileList.keys():
                    if central_or_shift != "central" and not is_mc:
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttH"
                    ) and sample_category != "signal":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttW"
                    ) and sample_category != "TTW":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttZ"
                    ) and sample_category != "TTZ":
                        continue

                    # build config files for executing analysis code
                    key_dir = getKey(process_name)
                    key_analyze_job = getKey(process_name, central_or_shift,
                                             jobId)
                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                            key_job, ntupleFiles)
                        continue
                    self.jobOptions_analyze[key_analyze_job] = {
                      'ntupleFiles' : ntupleFiles,
                      'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                         (self.channel, process_name, central_or_shift, jobId)),
                      'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                         (process_name, central_or_shift, jobId)),
                      'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                         (self.channel, process_name, central_or_shift, jobId)),
                      'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % \
                         (self.channel, process_name, central_or_shift, jobId)) if self.select_rle_output else "",
                      'sample_category' : sample_category,
                      'triggers' : sample_info["triggers"],
                      'hadTau_selection' : self.hadTau_selection_part2,
                      ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                      'use_HIP_mitigation_mediumMuonId' : True,
                      'is_mc' : is_mc,
                      'central_or_shift' : central_or_shift,
                      'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                      'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                      'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job])

                    # initialize input and output file names for hadd_stage1
                    key_hadd_stage1 = getKey(process_name)
                    if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys(
                    ):
                        self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                    self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                        self.jobOptions_analyze[key_analyze_job]
                        ['histogramFile'])
                    self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                      (self.channel, process_name))

            # initialize input and output file names for hadd_stage2
            key_hadd_stage1 = getKey(process_name)
            key_hadd_stage2 = getKey("all")
            if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys():
                self.inputFiles_hadd_stage2[key_hadd_stage2] = []
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                self.outputFile_hadd_stage1[key_hadd_stage1])
            self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \
              (self.channel))

        logging.info("Creating configuration files to run 'prepareDatacards'")
        for evtSelection in self.evtSelections:
            for histogramToFit in self.histograms_to_fit:
                key_prep_dcard_job = getKey(evtSelection, histogramToFit)
                key_hadd_stage2 = getKey("all")
                self.jobOptions_prep_dcard[key_prep_dcard_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "prepareDatacards_%s_%s_%s_cfg.py" %
                        (self.channel, evtSelection, histogramToFit)),
                    'datacardFile':
                    os.path.join(
                        self.dirs[DKEY_DCRD],
                        "prepareDatacards_%s_%s_%s.root" %
                        (self.channel, evtSelection, histogramToFit)),
                    'histogramDir':
                    "_".join([self.histogramDir_prep_dcard, evtSelection]),
                    'histogramToFit':
                    histogramToFit,
                    'label':
                    None
                }
                self.createCfg_prep_dcard(
                    self.jobOptions_prep_dcard[key_prep_dcard_job])

        logging.info("Creating configuration files to run 'makePlots'")
        for evtSelection in self.evtSelections:
            key_makePlots_job = getKey(evtSelection)
            key_hadd_stage2 = getKey("all")
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2],
                'cfgFile_modified':
                os.path.join(
                    self.dirs[DKEY_CFGS],
                    "makePlots_%s_%s_cfg.py" % (self.channel, evtSelection)),
                'outputFile':
                os.path.join(
                    self.dirs[DKEY_PLOT],
                    "makePlots_%s_%s.png" % (self.channel, evtSelection)),
                'histogramDir':
                "_".join([self.histogramDir_prep_dcard, evtSelection]),
                'label':
                evtSelection,
                'make_plots_backgrounds':
                self.make_plots_backgrounds
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
예제 #29
0
  def __init__(self, outputDir, executable_analyze, cfgFile_analyze_original, samples, hadTau_selection, hadTau_charge_selections, applyFakeRateWeights, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               executable_addBackgrounds, executable_addBackgroundJetToTauFakes, histograms_to_fit, select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "1l_2tau", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples
    
    self.lepton_and_hadTau_selections = [ "Tight", "Fakeable", "Fakeable_mcClosure" ]
    self.lepton_and_hadTau_frWeights = [ "enabled", "disabled" ]
    self.hadTau_selection_part2 = hadTau_selection
    self.hadTau_charge_selections = hadTau_charge_selections
    self.applyFakeRateWeights = applyFakeRateWeights
        
    self.lepton_genMatches = [ "1l0j", "0l1j" ]
    self.hadTau_genMatches = [ "2t0e0m0j", "1t1e0m0j", "1t0e1m0j", "1t0e0m1j", "0t2e0m0j", "0t1e1m0j", "0t1e0m1j", "0t0e2m0j", "0t0e1m1j", "0t0e0m2j" ]
    
    self.apply_leptonGenMatching = None
    self.apply_hadTauGenMatching = None
    self.lepton_and_hadTau_genMatches_nonfakes = []
    self.lepton_and_hadTau_genMatches_fakes = []
    if self.applyFakeRateWeights == "3L":
      self.apply_leptonGenMatching = True
      self.apply_hadTauGenMatching = True
      for lepton_genMatch in self.lepton_genMatches:
        for hadTau_genMatch in self.hadTau_genMatches:
          lepton_and_hadTau_genMatch = "&".join([ lepton_genMatch, hadTau_genMatch ])
          if lepton_genMatch.endswith("0j") and hadTau_genMatch.endswith("0j"):
            self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_and_hadTau_genMatch)
          else:
            self.lepton_and_hadTau_genMatches_fakes.append(lepton_and_hadTau_genMatch)
    elif applyFakeRateWeights == "2tau":
      self.apply_leptonGenMatching = False
      self.apply_hadTauGenMatching = True
      for hadTau_genMatch in self.hadTau_genMatches:
        if hadTau_genMatch.endswith("0j"):
          self.lepton_and_hadTau_genMatches_nonfakes.append(hadTau_genMatch)
        else:
          self.lepton_and_hadTau_genMatches_fakes.append(hadTau_genMatch)
    else:
      raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)

    self.executable_addBackgrounds = executable_addBackgrounds
    self.executable_addFakes = executable_addBackgroundJetToTauFakes
    
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for hadTau_charge_selection in self.hadTau_charge_selections:
            key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)  
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name)
    for dir_type in [ DKEY_DCRD, DKEY_PLOT ]:
      initDict(self.dirs, [ dir_type ])
      self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)          
    ##print "self.dirs = ", self.dirs

    self.nonfake_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares" ]
    
    self.cfgFile_analyze_original = os.path.join(self.workingDir, cfgFile_analyze_original)
    self.cfgFile_addBackgrounds_original = os.path.join(self.workingDir, "addBackgrounds_cfg.py")
    self.cfgFile_addBackgrounds_modified = {}
    self.histogramFile_addBackgrounds = {}
    self.histogramDir_addBackgrounds = {}    
    self.process_output_addBackgrounds = {}            
    self.histogramFile_hadd_stage1_5 = os.path.join(self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel)
    self.cfgFile_addFakes_original = os.path.join(self.workingDir, "addBackgroundJetToTauFakes_cfg.py")
    self.cfgFile_addFakes_modified = {}
    self.histogramFile_addFakes = {}
    self.prep_dcard_processesToCopy = [ "data_obs" ] + self.nonfake_backgrounds + [ "fakes_data", "fakes_mc" ]
    self.histogramDir_prep_dcard = "1l_2tau_OS_Tight"
    self.histogramDir_prep_dcard_SS = "1l_2tau_SS_Tight"
    self.make_plots_backgrounds = self.nonfake_backgrounds + [ "fakes_data" ]
    self.cfgFile_make_plots_mcClosure_original = os.path.join(self.workingDir, "makePlots_mcClosure_cfg.py")
    self.cfgFiles_make_plots_mcClosure_modified = []

    self.select_rle_output = select_rle_output
예제 #30
0
    def create(self):
        """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if not is_mc:
                continue

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList = generateInputFileList(sample_info,
                                                  self.max_files_per_job)
            key_dir = getKey(process_name)

            outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO],
                                      "%s.root" % process_name)
            if os.path.isfile(outputFile) and tools_is_file_ok(
                    outputFile, min_file_size=2000):
                logging.info('File {} already exists --> skipping job'.format(
                    outputFile))
                continue

            self.outputFiles[process_name] = {
                'inputFiles': [],
                'outputFile': outputFile
            }

            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    logging.warning(
                        "ntupleFiles['%s'] = %s --> skipping job !!" %
                        (key_file, self.inputFiles[key_file]))
                    continue

                self.cfgFiles_puProfile[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "puProfile_%s_%i_cfg.txt" % (process_name, jobId))
                self.outputFiles_tmp[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_HISTO_TMP],
                    "histogram_%i.root" % jobId)
                self.logFiles_puProfile[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS],
                    "puProfile_%s_%i.log" % (process_name, jobId))
                self.scriptFiles_puProfile[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "puProfile_%s_%i_cfg.sh" % (process_name, jobId))
                self.jobOptions_sbatch[key_file] = {
                    'histName': process_name,
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_path': self.cfgFiles_puProfile[key_file],
                    'outputFile': self.outputFiles_tmp[key_file],
                    'logFile': self.logFiles_puProfile[key_file],
                    'scriptFile': self.scriptFiles_puProfile[key_file],
                }
                self.createCfg_puProfile(self.jobOptions_sbatch[key_file])
                self.outputFiles[process_name]['inputFiles'].append(
                    self.outputFiles_tmp[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            self.num_jobs['puProfile'] += self.createScript_sbatch(
                self.executable, self.sbatchFile_puProfile,
                self.jobOptions_sbatch)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_puProfile(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.addToMakefile_plot(lines_makefile)
        self.addToMakefile_finalHadd(lines_makefile)
        self.createMakefile(lines_makefile)
        logging.info("Done")

        return self.num_jobs
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            #      for charge_selection in self.charge_selections:                                                          ## NO CHARGE SELECTION NEEDED HERE
            #        key_dir = getKey(process_name, charge_selection)                                                       ## NO CHARGE SELECTION NEEDED HERE
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    #            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                    #              "_".join([ charge_selection ]), process_name)                                                    ## NO CHARGE SELECTION NEEDED HERE
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    #            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                    #              "_".join([ charge_selection ]), process_name)                                                    ## NO CHARGE SELECTION NEEDED HERE
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT
            ]:  ## DKEY_PLOT TO BE ADDED LATER
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            apply_genWeight = sample_info["apply_genWeight"] if (
                is_mc and "apply_genWeight" in sample_info.keys()) else False
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]
            apply_trigger_bits = (
                is_mc and
                (self.era == "2015" or
                 (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

            #      for charge_selection in self.charge_selections:                                        ## NO CHARGE SELECTION NEEDED HERE
            for central_or_shift in self.central_or_shifts:
                inputFileList = inputFileLists[sample_name]
                for jobId in inputFileList.keys():
                    if central_or_shift != "central" and not is_mc:
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttH"
                    ) and sample_category != "signal":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttW"
                    ) and sample_category != "TTW":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttZ"
                    ) and sample_category != "TTZ":
                        continue
                    # build config files for executing analysis code
#            key_dir = getKey(process_name, charge_selection)                                  ## NO CHARGE SELECTION NEEDED HERE
                    key_dir = getKey(process_name)
                    #            key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ## NO CHARGE SELECTION NEEDED HERE
                    key_analyze_job = getKey(process_name, central_or_shift,
                                             jobId)

                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                            key_job, ntupleFiles)
                        continue
                    self.jobOptions_analyze[key_analyze_job] = {
                      'ntupleFiles' : ntupleFiles,
                    #              'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                    #                (self.channel, process_name, charge_selection, central_or_shift, jobId)),                          ## NO CHARGE SELECTION NEEDED HERE
                    #              'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                    #                (process_name, charge_selection, central_or_shift, jobId)),                                        ## NO CHARGE SELECTION NEEDED HERE
                    #              'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                    #                (self.channel, process_name, charge_selection, central_or_shift, jobId)),                          ## NO CHARGE SELECTION NEEDED HERE
                      'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                        (self.channel, process_name, central_or_shift, jobId)),
                      'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                        (process_name,  central_or_shift, jobId)),
                      'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                        (self.channel, process_name, central_or_shift, jobId)),
                      'sample_category' : sample_category,
                      'triggers' : sample_info["triggers"],
                    #              'charge_selection' : charge_selection,                                         ## NO CHARGE SELECTION NEEDED HERE
                    #              'jet_minPt' : self.jet_minPt,
                    #              'jet_maxPt' : self.jet_maxPt,
                    #              'jet_minAbsEta' : self.jet_minAbsEta,
                    #              'jet_maxAbsEta' : self.jet_maxAbsEta,
                    #              'hadTau_selections' : self.hadTau_selections,
                      'absEtaBins_e' : self.absEtaBins_e,
                      'absEtaBins_mu' : self.absEtaBins_mu,
                      'absPtBins_e' : self.absPtBins_e,
                      'absPtBins_mu' : self.absPtBins_mu,
                      ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                      'use_HIP_mitigation_mediumMuonId' : True,
                      'is_mc' : is_mc,
                      'central_or_shift' : central_or_shift,
                      'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                      'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                      'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job])

                    # initialize input and output file names for hadd_stage1
                    #            key_hadd_stage1 = getKey(process_name, charge_selection)
                    key_hadd_stage1 = getKey(
                        process_name)  ## NO CHARGE SELECTION NEEDED HERE
                    if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                        self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                    self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                        self.jobOptions_analyze[key_analyze_job]
                        ['histogramFile'])
                    #            self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                    #              (self.channel, process_name, charge_selection))                                                                                 ## NO CHARGE SELECTION NEEDED HERE
                    self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                      (self.channel, process_name))

        # initialize input and output file names for hadd_stage1_5
        key_hadd_stage1_5 = getKey('')
        if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5:
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = []
        for key_hadd_stage1 in self.outputFile_hadd_stage1.keys():
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(
                self.outputFile_hadd_stage1[key_hadd_stage1])
        self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(
            self.dirs[DKEY_HIST], "histograms_harvested_stage1_5.root")

        ## Creating configuration files to run 'addBackgrounds_LeptonFakeRate' [stage 1.5]
        key_addBackgrounds_job = getKey('')
        self.jobOptions_addBackgrounds_LeptonFakeRate[
            key_addBackgrounds_job] = {
                'inputFile':
                self.outputFile_hadd_stage1_5[key_hadd_stage1_5],
                'cfgFile_modified':
                os.path.join(
                    self.dirs[DKEY_CFGS],
                    os.path.basename(
                        self.cfgFile_addBackgrounds_LeptonFakeRate)),
                'outputFile':
                os.path.join(self.dirs[DKEY_HIST],
                             "addBackground_LeptonFakeRate.root"),
                'logFile':
                os.path.join(
                    self.dirs[DKEY_LOGS],
                    os.path.basename(
                        self.cfgFile_addBackgrounds_LeptonFakeRate.replace(
                            "_cfg.py", ".log"))),
            }
        self.createCfg_addBackgrounds_LeptonFakeRate(
            self.
            jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job])

        # initialize input and output file names for hadd_stage2
        #        key_hadd_stage2 = getKey(charge_selection)                                                                                            ## NO CHARGE SELECTION NEEDED HERE
        #        if not key_hadd_stage2 in self.inputFiles_hadd_stage2:                                                                                ## NO CHARGE SELECTION NEEDED HERE
        #          self.inputFiles_hadd_stage2[key_hadd_stage2] = []                                                                                   ## NO CHARGE SELECTION NEEDED HERE
        #        self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])                                     ## NO CHARGE SELECTION NEEDED HERE
        #        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \        ## NO CHARGE SELECTION NEEDED HERE
        #          (self.channel, charge_selection))                                                                                                   ## NO CHARGE SELECTION NEEDED HERE
        key_hadd_stage2 = getKey('')
        if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2] = []
        for key_hadd_stage1_5 in self.outputFile_hadd_stage1_5.keys():
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                self.outputFile_hadd_stage1_5[key_hadd_stage1_5])
        self.inputFiles_hadd_stage2[key_hadd_stage2].append(
            self.jobOptions_addBackgrounds_LeptonFakeRate[
                key_addBackgrounds_job]['outputFile'])
        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(
            self.dirs[DKEY_HIST], "histograms_harvested_stage2.root")

        if self.prep_dcard:
            processesToCopy = []
            signals = []
            logging.info(
                "Creating configuration files to run 'prepareDatacards_LeptonFakeRate'"
            )
            for process in self.prep_dcard_signals:
                signals.append(process)
            self.prep_dcard_signals = signals
            for process in self.prep_dcard_processesToCopy:
                processesToCopy.append(process)
            self.prep_dcard_processesToCopy = processesToCopy
            for histogramToFit in self.histograms_to_fit:
                key_prep_dcard_job = getKey(histogramToFit)
                self.jobOptions_prep_dcard[key_prep_dcard_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "prepareDatacards_LeptonFakeRate_%s_cfg.py" %
                        (histogramToFit)),
                    'datacardFile':
                    os.path.join(self.dirs[DKEY_DCRD],
                                 "prepareDatacards_%s.root" %
                                 (histogramToFit)),
                    'histogramDir': (self.histogramDir_prep_dcard),
                    'histogramToFit':
                    histogramToFit,
                    'label':
                    None
                }
                #        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])       ## DEF LINE
                self.createCfg_prep_dcard_LeptonFakeRate(
                    self.jobOptions_prep_dcard[key_prep_dcard_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            #      self.createScript_sbatch()

            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join(
                self.dirs[DKEY_SCRIPTS],
                "sbatch_addBackgrounds_LeptonFakeRate_%s.py" % self.channel)
            self.createScript_sbatch(
                self.executable_addBackgrounds_LeptonFakeRate,
                self.sbatchFile_addBackgrounds_LeptonFakeRate,
                self.jobOptions_addBackgrounds_LeptonFakeRate)


#      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate)
#      self.sbatchFile_comp_jetToTauFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
#      self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate)

#### FAKE RATE COMP BLOCK COMMENTED OUT ########################

#    logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'")
#    for charge_selection in self.charge_selections:
#      key_comp_jetToTauFakeRate_job = getKey(charge_selection)
#      key_hadd_stage2 = getKey(charge_selection)
#      self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job] = {
#        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#        'cfgFile_modified' : os.path.join(
#          self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection),
#        'outputFile' : os.path.join(
#          self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection),
#        'looseRegion' : "jetToTauFakeRate_%s/denominator/" % charge_selection,
#        'tightRegion' : "jetToTauFakeRate_%s/numerator/" % charge_selection,
#        'absEtaBins' : self.absEtaBins,
#        'ptBins' : self.ptBins
#      }
#      self.createCfg_comp_jetToTauFakeRate(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job])
#      self.targets.append(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]['outputFile'])

#    logging.info("Creating configuration files to run 'makePlots'")
#    for charge_selection in self.charge_selections:
#      key_makePlots_job = getKey(charge_selection)
#      key_hadd_stage2 = getKey(charge_selection)
#      self.jobOptions_make_plots[key_makePlots_job] = {
#        'executable' : self.executable_make_plots,
#        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#        'cfgFile_modified' : os.path.join(
#          self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
#        'outputFile' : os.path.join(
#          self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel),
#        'histogramDir' : "jetToTauFakeRate_%s" % charge_selection,
#        'label' : None,
#        'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#      }
#      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#      self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
#      for absEtaBin in [ "absEtaLt1_5", "absEta1_5to9_9" ]:
#        key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator")
#        key_hadd_stage2 = getKey(charge_selection)
#        self.jobOptions_make_plots[key_makePlots_job] = {
#          'executable' : self.executable_make_plots,
#          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#          'cfgFile_modified' : os.path.join(
#            self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)),
#          'outputFile' : os.path.join(
#            self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)),
#          'histogramDir' : "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin),
#          'label' : None,
#          'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#        }
#        self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#        for hadTau_selection in self.hadTau_selections:
#          key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection)
#          key_hadd_stage2 = getKey(charge_selection)
#          self.jobOptions_make_plots[key_makePlots_job] = {
#            'executable' : self.executable_make_plots,
#            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#            'cfgFile_modified' : os.path.join(
#              self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection, absEtaBin)),
#            'outputFile' : os.path.join(
#              self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection, absEtaBin)),
#            'histogramDir' : "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection, absEtaBin),
#            'label' : None,
#            'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#          }
#          self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#########################################################

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        #    self.addToMakefile_hadd_stage1_5(lines_makefile)
        self.addToMakefile_backgrounds_from_data(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        #    self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)   ## TO BE IMPLEMENTED LATER
        #    self.addToMakefile_make_plots(lines_makefile)              ## TO BE IMPLEMENTED LATER
        self.targets = [
            outputFile for outputFile in self.outputFile_hadd_stage2.values()
        ]
        self.createMakefile(lines_makefile)

        logging.info("Done")
예제 #32
0
    def __init__(self,
                 configDir,
                 outputDir,
                 executable_prodNtuple,
                 cfgFile_prodNtuple,
                 samples,
                 max_files_per_job,
                 era,
                 preselection_cuts,
                 leptonSelection,
                 hadTauSelection,
                 debug,
                 running_method,
                 version,
                 num_parallel_jobs,
                 pool_id=''):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable_prodNtuple = executable_prodNtuple
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.preselection_cuts = preselection_cuts
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        self.debug = debug
        assert (running_method.lower()
                in ["sbatch",
                    "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple")
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        self.version = version

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(
            os.path.join(self.configDir, "stdout_prodNtuple.log"), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.configDir, "stderr_prodNtuple.log"), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFile_prodNtuple_original = os.path.join(
            self.workingDir, cfgFile_prodNtuple)
        self.cfgFiles_prodNtuple_modified = {}
        self.logFiles_prodNtuple = {}
        self.sbatchFile_prodNtuple = os.path.join(self.configDir,
                                                  "sbatch_prodNtuple.py")
        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in [DKEY_CFGS, DKEY_LOGS]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type)
        ##print "self.dirs = ", self.dirs

        self.cvmfs_error_log = {}
예제 #33
0
    def __init__(
        self,
        configDir,
        outputDir,
        cfgFile_prodNtuple,
        samples,
        max_files_per_job,
        era,
        preselection_cuts,
        leptonSelection,
        hadTauWP,
        check_output_files,
        running_method,
        version,
        num_parallel_jobs,
        pileup,
        golden_json,
        dry_run,
        isDebug,
        gen_matching_by_index,
        use_nonnominal,
        use_home,
        skip_tools_step,
        verbose=False,
        pool_id='',
    ):

        self.configDir = configDir
        self.outputDir = outputDir
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.preselection_cuts = preselection_cuts
        self.leptonSelection = leptonSelection
        self.hadTauWP = hadTauWP
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.isDebug = isDebug
        self.gen_matching_by_index = gen_matching_by_index
        self.use_nonnominal = use_nonnominal
        self.use_home = use_home
        self.pileup = pileup
        self.golden_json = golden_json
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        if not os.path.isfile(self.pileup):
            raise ValueError('No such file: %s' % self.pileup)
        self.pileup_histograms = get_pileup_histograms(self.pileup)

        if not os.path.isfile(self.golden_json):
            raise ValueError('No such file: %s' % self.golden_json)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple")
        self.num_parallel_jobs = num_parallel_jobs
        self.skip_tools_step = skip_tools_step
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        self.version = version
        self.samples = samples

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file_path = os.path.join(self.configDir,
                                             "stdout_prodNtuple.log")
        self.stderr_file_path = os.path.join(self.configDir,
                                             "stderr_prodNtuple.log")
        self.sw_ver_file_cfg = os.path.join(self.configDir,
                                            "VERSION_prodNtuple.log")
        self.sw_ver_file_out = os.path.join(self.outputDir,
                                            "VERSION_prodNtuple.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out))

        self.cfgFile_prodNtuple_original = os.path.join(
            self.template_dir, cfgFile_prodNtuple)
        self.sbatchFile_prodNtuple = os.path.join(self.configDir,
                                                  "sbatch_prodNtuple.py")
        self.cfgFiles_prodNtuple_modified = {}
        self.logFiles_prodNtuple = {}

        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []
        self.dirs = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in [DKEY_CFGS, DKEY_LOGS]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type)

        self.cvmfs_error_log = {}
        self.executable = "produceNtuple.sh"
예제 #34
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for apply_jetSmearing in self.apply_jetSmearing_options:
            jetSmearingLabel = None
            if apply_jetSmearing:
                jetSmearingLabel = "jetSmearingEnabled"
            else:
                jetSmearingLabel = "jetSmearingDisabled"
            for apply_metSmearing in self.apply_metSmearing_options:
                metSmearingLabel = None
                if apply_metSmearing:
                    metSmearingLabel = "metSmearingEnabled"
                else:
                    metSmearingLabel = "metSmearingDisabled"
                for sample_name, sample_info in self.samples.items():
                    if not sample_info["use_it"]:
                        continue
                    process_name = sample_info["process_name_specific"]
                    isSignal = True if process_name.find(
                        "signal") != -1 else False
                    logging.info(
                        "Creating configuration files to run '%s' for sample %s"
                        % (self.executable_analyze, process_name))
                    sample_category = sample_info["sample_category"]

                    inputFileList = inputFileLists[sample_name]
                    numJobsPerFile = None
                    if sample_info[
                            "process_name_specific"] == "signal_ggf_nonresonant_node_sm_hh_2b2v":
                        numJobsPerFile = 500
                    elif sample_info[
                            "process_name_specific"] == "signal_ggf_nonresonant_cHHH1_hh_2b2v":
                        numJobsPerFile = 100
                    elif sample_info[
                            "process_name_specific"] == "TTJets_DiLept":
                        numJobsPerFile = 50
                    elif sample_info[
                            "process_name_specific"] == "TTJets_DiLept_ext1":
                        numJobsPerFile = 50
                    elif sample_info["process_name_specific"] == "TTTo2L2Nu":
                        numJobsPerFile = 10
                    else:
                        raise ValueError("Invalid sample: %s" %
                                         sample_info["process_name_specific"])
                    numJobs = numJobsPerFile * len(inputFileList.keys())
                    for jobId in range(1, numJobs + 1):

                        ntupleId = ((jobId - 1) / numJobsPerFile) + 1
                        maxSelEvents = 500
                        skipSelEvents = maxSelEvents * (
                            (jobId - 1) % numJobsPerFile)

                        # build config files for executing analysis code
                        key_dir = getKey(process_name)
                        key_analyze_job = getKey(process_name,
                                                 jetSmearingLabel,
                                                 metSmearingLabel, jobId)
                        ntupleFiles = inputFileList[ntupleId]
                        if len(ntupleFiles) == 0:
                            logging.warning(
                                "No input ntuples for %s --> skipping job !!" %
                                (key_analyze_job))
                            continue

                        cfgFile_modified_path = os.path.join(
                            self.dirs[key_dir][DKEY_CFGS],
                            "analyze_%s_%s_%s_%s_%i_cfg.py" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        histogramFile_path = os.path.join(
                            self.dirs[key_dir][DKEY_HIST],
                            "analyze_%s_%s_%s_%s_%i.root" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        logFile_path = os.path.join(
                            self.dirs[key_dir][DKEY_LOGS],
                            "analyze_%s_%s_%s_%s_%i.log" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        rleOutputFile_path = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) \
                                             if self.select_rle_output else ""
                        self.jobOptions_analyze[key_analyze_job] = {
                            'ntupleFiles': ntupleFiles,
                            'cfgFile_modified': cfgFile_modified_path,
                            'histogramFile': histogramFile_path,
                            'logFile': logFile_path,
                            'selEventsFileName_output': rleOutputFile_path,
                            'apply_jetSmearing': apply_jetSmearing,
                            'apply_metSmearing': apply_metSmearing,
                            'maxSelEvents': maxSelEvents,
                            'skipSelEvents': skipSelEvents
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job],
                            sample_info)

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1 = getKey(process_name,
                                                 jetSmearingLabel,
                                                 metSmearingLabel)
                        if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                        self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                            self.jobOptions_analyze[key_analyze_job]
                            ['histogramFile'])
                        self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \
                          (self.channel, process_name, jetSmearingLabel, metSmearingLabel))

                    # add output files of hadd_stage1 to list of input files for hadd_stage2
                    key_hadd_stage1 = getKey(process_name, jetSmearingLabel,
                                             metSmearingLabel)
                    key_hadd_stage2 = getKey("")
                    if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
                        self.inputFiles_hadd_stage2[key_hadd_stage2] = []
                    self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                        self.outputFile_hadd_stage1[key_hadd_stage1])
                    self.outputFile_hadd_stage2[
                        key_hadd_stage2] = os.path.join(
                            self.dirs[DKEY_HIST],
                            "histograms_harvested_stage2_%s.root" %
                            self.channel)

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.targets.extend(self.outputFile_hadd_stage2.values())
        self.createMakefile(lines_makefile)

        logging.info("Done")

        return self.num_jobs
예제 #35
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """
  
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
        #lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
        key_dir = getKey(process_name, lepton_selection)
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
          initDict(self.dirs, [ key_dir, dir_type ])
          if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
              "_".join([ lepton_selection ]), process_name)
          else:
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
              "_".join([ lepton_selection ]), process_name)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs    

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)

    for lepton_selection in self.lepton_selections:
        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
            continue
          process_name = sample_info["process_name_specific"]

          # Edit Siddh ~~~~~~~~~~~~~
          run_process = False 
          for sprocess_run in sample_process_run_s:    
            if sprocess_run == process_name:  
              run_process = True  
          #print "Run process: ", sprocess_run     
              
          if run_process == False:   
            continue       
          #print "run_process:",process_name    
          # ~~~~~~~~~~~~~~~~~~~~~~~~~~

          #if not ("DY" in process_name or "Muon" in process_name): continue
          logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  
          sample_category = sample_info["sample_category"]
          is_mc = (sample_info["type"] == "mc")
          is_signal = (sample_category == "signal")
          inputFileList = inputFileLists[sample_name]
          for central_or_shift in self.central_or_shifts:
            for jobId in inputFileList.keys():
              #if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and lepton_charge_selection == "SS"):
              #  continue
              if central_or_shift != "central" and not is_mc:
                continue                

              # build config files for executing analysis code
              key_dir = getKey(process_name, lepton_selection)
              key_analyze_job = getKey(process_name, lepton_selection, central_or_shift, jobId)
              ntupleFiles = inputFileList[jobId]
              if len(ntupleFiles) == 0:
                print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_job, ntupleFiles)
                continue
              self.jobOptions_analyze[key_analyze_job] = {
                'ntupleFiles' : ntupleFiles,
                'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                  (self.channel, process_name, lepton_selection, central_or_shift, jobId)),
                'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                  (process_name, lepton_selection, central_or_shift, jobId)),
                'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                  (self.channel, process_name, lepton_selection, central_or_shift, jobId)),
                'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \
                  (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else "",
                'sample_category' : sample_category,
                'triggers' : sample_info["triggers"],
                'lepton_selection' : lepton_selection,
                #'apply_leptonGenMatching' : self.apply_leptonGenMatching,
                #'apply_hadTauGenMatching' : self.apply_hadTauGenMatching,
                #'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled",
                'applyFakeRateWeights' : "disabled",
                'use_HIP_mitigation_mediumMuonId' : True,
                'is_mc' : is_mc,
                'central_or_shift' : central_or_shift,
                'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc
              }

              #applyFakeRateWeights = self.applyFakeRateWeights
              #if lepton_and_hadTau_frWeight == "disabled":
              #  applyFakeRateWeights = "disabled"
              self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job])

              # initialize input and output file names for hadd_stage1
              key_hadd_stage1 = getKey(process_name, lepton_selection)
              if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys():
                self.inputFiles_hadd_stage1[key_hadd_stage1] = []
              self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
              self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                  (self.channel, process_name, lepton_selection))
          print key_hadd_stage1, self.channel, process_name, lepton_selection, self.outputFile_hadd_stage1[key_hadd_stage1]

          #key_hadd_stage1 = getKey(process_name, lepton_selection)
          #key_hadd_stage1_5 = getKey(lepton_selection)
          #print self.inputFiles_hadd_stage1_5 
          #self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1])

          key_addBackgrounds_job = getKey(lepton_selection)
          sample_categories = []
          sample_categories.extend([ "signal" ])
          processes_input = []


          # initialize input and output file names for hadd_stage2
          key_hadd_stage2 = getKey(lepton_selection)
          if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys():
            self.inputFiles_hadd_stage2[key_hadd_stage2] = []
          #if lepton_selection == "Tight":
          #  self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
          #key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight)
          self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])
        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \
            (self.channel, lepton_selection))

    key_hadd_stage2 = getKey(lepton_selection)
    #self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    processesToCopy = []
    for process in self.prep_dcard_processesToCopy:
      processesToCopy.append(process)
    self.prep_dcard_processesToCopy = processesToCopy
    processesToCopy = []
    for process in self.prep_dcard_signals:
      processesToCopy.append(process)
    self.prep_dcard_signals = processesToCopy
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_job = getKey(histogramToFit)
      key_hadd_stage2 = getKey(lepton_selection)
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
        'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)),
        'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }                            
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    #self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    #self.addToMakefile_add_syst_dcard(lines_makefile)
    #self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      process_name = sample_info["process_name_specific"]
      for mode in self.modes:
        key_dir = getKey(process_name, mode)
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
          if dir_type == DKEY_SYNC and not self.do_sync:
            continue
          initDict(self.dirs, [ key_dir, dir_type ])
          if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ mode ]), process_name)
          else:
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ mode ]), process_name)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for mode in self.modes:
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        process_name = sample_info["process_name_specific"]
        logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

        sample_category = sample_info["sample_category"]
        is_mc = (sample_info["type"] == "mc")

        for central_or_shift in self.central_or_shifts:

          inputFileList = inputFileLists[sample_name]
          for jobId in inputFileList.keys():

            # build config files for executing analysis code
            key_dir = getKey(process_name, mode)
            key_analyze_job = getKey(process_name, mode, central_or_shift, jobId)
            ntupleFiles = inputFileList[jobId]
            if len(ntupleFiles) == 0:
              logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
              continue

            self.jobOptions_analyze[key_analyze_job] = {
              'ntupleFiles' : ntupleFiles,
              'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                (self.channel, process_name, mode, central_or_shift, jobId)),
              'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                (process_name, mode, central_or_shift, jobId)),
              'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                (self.channel, process_name, mode, central_or_shift, jobId)),
              'sample_category' : sample_category,
              'mode' : mode,
              'lepton_selection' : self.lepton_selection,
              'hadTau_selection' : self.hadTau_selection,
              'SVfit4tau_logM_wMassConstraint_MarkovChain' : self.SVfit4tau_logM_wMassConstraint_MarkovChain,
              'SVfit4tau_logM_woMassConstraint_MarkovChain' : self.SVfit4tau_logM_woMassConstraint_MarkovChain,
              'SVfit4tau_logM_wMassConstraint_VAMP' : self.SVfit4tau_logM_wMassConstraint_VAMP,
              'use_HIP_mitigation_mediumMuonId' : False,
              'is_mc' : is_mc,
              'central_or_shift' : central_or_shift,
              'lumi_scale' : 1.,
              'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
            }
            self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job])

            # initialize input and output file names for hadd_stage1
            key_hadd_stage1 = getKey(process_name, mode)
            if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
              self.inputFiles_hadd_stage1[key_hadd_stage1] = []
            self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
            self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
              (self.channel, process_name, mode))

            # initialize input and output file names for hadd_stage2
            key_hadd_stage2 = getKey()
            if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
              self.inputFiles_hadd_stage2[key_hadd_stage2] = []
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])
            self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \
              (self.channel))

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done")

    return self.num_jobs
예제 #37
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
                ##print "processing sample %s: jobId = %i" % (process_name, jobId)

                # build config files for executing analysis code
                key_dir = getKey(process_name)
                key_analyze_job = getKey(process_name, jobId)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                        key_file, ntupleFiles)
                    continue

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles' : ntupleFiles,
                  'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % \
                     (self.channel, process_name, jobId)),
                  'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%i.root" % \
                     (process_name, jobId)),
                  'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % \
                     (self.channel, process_name, jobId)),
                  'sample_category' : sample_category,
                  'hadTau_selection' : self.hadTau_selection,
                  'use_HIP_mitigation_mediumMuonId' : True,
                  'is_mc' : is_mc,
                  'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                  'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
                  'selectBDT' : True,
                  'changeBranchNames' : self.changeBranchNames
                }
                self.createCfg_analyze(
                    self.jobOptions_analyze[key_analyze_job])

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1 = getKey(process_name)
                if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                    self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                    self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                  (self.channel, process_name))
                self.targets.append(
                    self.outputFile_hadd_stage1[key_hadd_stage1])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")

        return self.num_jobs
예제 #38
0
    def __init__(self,
                 treeName,
                 outputDir,
                 cfgDir,
                 executable_addMEM,
                 samples,
                 era,
                 debug,
                 running_method,
                 max_files_per_job,
                 mem_integrations_per_job,
                 max_mem_integrations,
                 num_parallel_jobs,
                 leptonSelection,
                 hadTauSelection,
                 isForBDTtraining,
                 channel,
                 pool_id=''):

        self.treeName = treeName
        self.outputDir = outputDir
        self.cfgDir = cfgDir
        self.executable_addMEM = executable_addMEM
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.samples = samples
        self.era = era
        self.debug = debug
        self.channel = channel
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        self.hadTauDefinition = self.hadTauSelection.split('|')[0]
        self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1]
        self.maxPermutations_branchName = "maxPermutations_addMEM_%s_lep%s_tau%s_%s" % (
            self.channel,
            self.leptonSelection,
            self.hadTauDefinition,
            self.hadTauWorkingPoint,
        )
        self.isForBDTtraining = isForBDTtraining
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.cfgDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: {workingDir}".format(
            workingDir=self.workingDir))

        for dirPath in [self.outputDir, self.cfgDir]:
            create_if_not_exists(dirPath)
        self.stdout_file = codecs.open(
            os.path.join(self.cfgDir, "stdout_%s.log" % self.channel), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.cfgDir, "stderr_%s.log" % self.channel), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFiles_addMEM_modified = {}
        self.shFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(
            self.cfgDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or \
                sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(
                    self.outputDir, dir_type, self.channel, process_name)
            for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(
                    self.cfgDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
예제 #39
0
  def __init__(self, outputDir, executable_analyze, cfgFile_analyze_original, samples, lepton_charge_selections, hadTau_selection, applyFakeRateWeights, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               executable_addBackgrounds, executable_addFakes, executable_addFlips, histograms_to_fit, select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "2lss_1tau", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples

    self.lepton_and_hadTau_selections = [ "Tight", "Fakeable", "Fakeable_mcClosure" ]
    self.lepton_and_hadTau_frWeights = [ "enabled", "disabled" ]
    self.lepton_charge_selections = lepton_charge_selections
    self.hadTau_selection_part2 = hadTau_selection
    self.applyFakeRateWeights = applyFakeRateWeights
        
    self.lepton_genMatches = [ "2l0j", "1l1j", "0l2j" ]
    self.hadTau_genMatches = [ "1t0e0m0j", "0t1e0m0j", "0t0e1m0j", "0t0e0m1j" ]

    self.apply_leptonGenMatching = None
    self.apply_hadTauGenMatching = None
    self.lepton_and_hadTau_genMatches_nonfakes = []
    self.lepton_and_hadTau_genMatches_fakes = []
    if self.applyFakeRateWeights == "3L":
      self.apply_leptonGenMatching = True
      self.apply_hadTauGenMatching = True
      for lepton_genMatch in self.lepton_genMatches:
        for hadTau_genMatch in self.hadTau_genMatches:
          lepton_and_hadTau_genMatch = "&".join([ lepton_genMatch, hadTau_genMatch ])
          if lepton_genMatch.endswith("0j") and hadTau_genMatch.endswith("0j"):
            self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_and_hadTau_genMatch)
          else:
            self.lepton_and_hadTau_genMatches_fakes.append(lepton_and_hadTau_genMatch)
    elif applyFakeRateWeights == "2lepton":
      self.apply_leptonGenMatching = True
      self.apply_hadTauGenMatching = False
      for lepton_genMatch in self.lepton_genMatches:
        if lepton_genMatch.endswith("0j"):
          self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_genMatch)
        else:
          self.lepton_and_hadTau_genMatches_fakes.append(lepton_genMatch)
    else:
      raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)

    self.executable_addBackgrounds = executable_addBackgrounds    
    self.executable_addFakes = executable_addFakes
    self.executable_addFlips = executable_addFlips
    
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for lepton_charge_selection in self.lepton_charge_selections:
            key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                "_".join([ lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection ]), process_name)
    for dir_type in [ DKEY_DCRD, DKEY_PLOT ]:
      initDict(self.dirs, [ dir_type ])
      self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs

    if self.applyFakeRateWeights == "3L":
      self.nonfake_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares" ]
      self.prep_dcard_processesToCopy = [ "data_obs", "TT", "TTW", "TTZ", "EWK", "Rares", "fakes_data", "fakes_mc", "flips_data" ]
      self.make_plots_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares", "fakes_data", "flips_data" ]
    elif applyFakeRateWeights == "2lepton":
      if era == '2015':
        for sample_name in [
          "/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v2/MINIAODSIM",
          "/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_tW_top_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_t-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_t-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext1-v1/MINIAODSIM",
          "/ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v2/MINIAODSIM",
          "/WWTo2L2Nu_13TeV-powheg/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ZZTo4L_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM" ]:
          self.samples[sample_name]["sample_category"] = "background_data_estimate"
        self.samples["/WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM"]["sample_category"] = "WZ"        
      elif era == '2016':
        for sample_name in [ 
          '/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM',
          '/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM',
          '/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v4/MINIAODSIM',
          '/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM',
          '/DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14_ext1-v1/MINIAODSIM',
          '/WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14_ext1-v1/MINIAODSIM',
          '/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_tW_top_5f_NoFullyHadronicDecays_13TeV-powheg_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_t-channel_antitop_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_t-channel_top_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/WWTo2L2Nu_13TeV-powheg/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ZZTo4L_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14-v1/MINIAODSIM' ]:
          self.samples[sample_name]["sample_category"] = "background_data_estimate"
        self.samples["/WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM"]["sample_category"] = "WZ"
      else:
        raise ValueError("Invalid Configuration parameter 'era' = %s !!" % era)
      self.nonfake_backgrounds = [ "TTW", "TTZ", "WZ", "Rares" ]
      self.prep_dcard_processesToCopy = [ "data_obs", "TTW", "TTZ", "WZ", "Rares", "fakes_data", "fakes_mc", "flips_data" ]
      self.make_plots_backgrounds = [ "TTW", "TTZ", "WZ", "Rares", "fakes_data", "flips_data" ]
    else:
      raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)
      
    self.cfgFile_analyze_original = os.path.join(self.workingDir, cfgFile_analyze_original)
    self.cfgFile_addBackgrounds_original = os.path.join(self.workingDir, "addBackgrounds_cfg.py")
    self.cfgFile_addBackgrounds_modified = {}
    self.histogramFile_addBackgrounds = {}
    self.histogramDir_addBackgrounds = {}    
    self.process_output_addBackgrounds = {}            
    self.histogramFile_hadd_stage1_5 = os.path.join(self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel)
    self.cfgFile_addFakes_original = os.path.join(self.workingDir, "addBackgroundLeptonFakes_cfg.py")
    self.cfgFile_addFakes_modified = {}
    self.histogramFile_addFakes = {}
    self.histogramFile_addFlips = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundLeptonFlips_%s.root" % self.channel)
    self.cfgFile_addFlips_original = os.path.join(self.workingDir, "addBackgroundLeptonFlips_cfg.py")
    self.cfgFile_addFlips_modified = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundLeptonFlips_%s_cfg.py" % self.channel)
    self.histogramDir_prep_dcard = "2lss_1tau_SS_Tight"
    self.histogramDir_prep_dcard_OS = "2lss_1tau_OS_Tight"
    self.cfgFile_make_plots_original = os.path.join(self.workingDir, "makePlots_2lss_1tau_cfg.py")
    self.cfgFile_make_plots_mcClosure_original = os.path.join(self.workingDir, "makePlots_mcClosure_cfg.py")
    self.cfgFiles_make_plots_mcClosure_modified = []

    self.select_rle_output = select_rle_output
예제 #40
0
    def create(self):
        """Creates all necessary config files and runs the Ntuple production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if is_mc and process_name not in self.pileup_histograms:
                raise ValueError("Missing PU distribution for %s in file %s" %
                                 (process_name, self.pileup))

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList = generateInputFileList(sample_info,
                                                  self.max_files_per_job)
            key_dir = getKey(sample_name)
            subDirs = list(
                map(
                    lambda y: os.path.join(self.dirs[key_dir][DKEY_NTUPLES],
                                           '%04d' % y),
                    set(map(lambda x: x // 1000, inputFileList.keys()))))
            for subDir in subDirs:
                create_if_not_exists(subDir)
            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    logging.warning(
                        "ntupleFiles['%s'] = %s --> skipping job !!" %
                        (key_file, self.inputFiles[key_file]))
                    continue
                self.cfgFiles_prodNtuple_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "produceNtuple_%s_%i_cfg.py" % (process_name, jobId))
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES], "%04d" % (jobId // 1000),
                    "tree_%i.root" % jobId)
                self.logFiles_prodNtuple[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS],
                    "produceNtuple_%s_%i.log" % (process_name, jobId))
                hlt_paths = sample_info["hlt_paths"] if not is_mc else []
                hlt_cuts = list(
                    Triggers(self.era).triggers_flat
                ) if self.preselection_cuts["applyHLTcut"] else []
                jobOptions = {
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_modified':
                    self.cfgFiles_prodNtuple_modified[key_file],
                    'outputFile': self.outputFiles[key_file],
                    'is_mc': is_mc,
                    'random_seed': jobId,
                    'process_name': process_name,
                    'category_name': sample_info["sample_category"],
                    'triggers': hlt_paths,
                    'HLTcuts': hlt_cuts,
                }
                self.createCfg_prodNtuple(jobOptions)

        num_jobs = 0
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            num_jobs = self.createScript_sbatch()
            logging.info("Generated %i job(s)" % num_jobs)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_prodNtuple(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
        return num_jobs
예제 #41
0
    def create(self):
        """Creates all necessary config files and runs the MEM -- either locally or on the batch system
        """
        statistics = {}

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        # read the file in, sample-by-sample
        # build the dictionary recursively
        # add rle file also to generated cfg files
        # print integrations per job as well!
        # consider more than 1 file per jobs -- the jobs are splitted by MEM integration anyways

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or \
              sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue

            if not os.path.exists(sample_info['local_paths'][0]['path']):
                logging.warning("Skipping sample {sample_name}".format(
                    sample_name=sample_name))
                continue

            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_addMEM, process_name))
            is_mc = (sample_info["type"] == "mc")

            inputFileList = generateInputFileList(sample_name, sample_info,
                                                  self.max_files_per_job,
                                                  self.debug)
            # typically, the analysis ends here and starts looping b/c the smallest unit of work processes
            # at least one file; we need, however, to split the file into event ranges in such a way that
            # each job performs mem_integrations_per_job MEM integrations

            # so what we are going to do is to open each set of files in inputFileList, read the variable
            # requestMEM_*l_*tau and try to gather the event ranges such that each event range
            # performs up to mem_integrations_per_job integrations per job
            memEvtRangeDict = self.memJobList(inputFileList)

            for jobId in memEvtRangeDict.keys():

                key_dir = getKey(sample_name)
                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = memEvtRangeDict[jobId][
                    'input_fileset']

                # there should always be a job
                assert(self.inputFiles[key_file] > 0), "More than one input file: %s ?? !!" % \
                                                       ', '.join(self.inputFiles[key_file])

                #assert(len(self.inputFiles[key_file]) == 1), "There is more than one input file!"
                self.cfgFiles_addMEM_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS], "addMEM_%s_%s_%i_cfg.py" %
                    (self.channel, process_name, jobId))
                self.shFiles_addMEM_modified[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "addMEM_%s_%s_%i.sh" % (self.channel, process_name, jobId))
                self.outputFiles[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_NTUPLES],
                    "%s_%i.root" % (process_name, jobId))
                self.logFiles_addMEM[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS], "addMEM_%s_%s_%i.log" %
                    (self.channel, process_name, jobId))
                self.createCfg_addMEM(
                    self.inputFiles[key_file],
                    memEvtRangeDict[jobId]['event_range'][0],
                    memEvtRangeDict[jobId]['event_range'][1],
                    self.outputFiles[key_file],
                    self.era,
                    is_mc,
                    self.cfgFiles_addMEM_modified[key_file],
                )

                # associate the output file with the fileset_id
                #UDPATE: ONE OUTPUT FILE PER SAMPLE!
                fileset_id = memEvtRangeDict[jobId]['fileset_id']
                hadd_output_dir = os.path.join(
                    self.dirs[key_dir][DKEY_FINAL_NTUPLES],
                    str('%04d' % fileset_id // 1000))
                if not os.path.exists(hadd_output_dir):
                    os.makedirs(hadd_output_dir)
                hadd_output = os.path.join(
                    hadd_output_dir,
                    '%s_%i.root' % ('tree', fileset_id)  # UDPATE: ADDED
                    #hadd_output_dir, "tree.root" # UDPATE: REMOVED
                )
                if hadd_output not in self.hadd_records:
                    self.hadd_records[hadd_output] = {}
                    self.hadd_records[hadd_output]['output_files'] = []
                self.hadd_records[hadd_output]['fileset_id'] = fileset_id
                self.hadd_records[hadd_output]['output_files'].append(
                    self.outputFiles[key_file])
                self.hadd_records[hadd_output]['process_name'] = process_name
                #self.filesToClean.append(self.outputFiles[key_file])

            # let's sum the number of integration per sample
            nofEntriesMap = {}
            for v in memEvtRangeDict.values():
                if v['fileset_id'] not in nofEntriesMap:
                    nofEntriesMap[v['fileset_id']] = {
                        'nof_entries': v['nof_entries'],
                    }
            statistics[process_name] = {
                'nof_int':
                sum([entry['nof_int'] for entry in memEvtRangeDict.values()]),
                'nof_entries':
                sum([entry['nof_entries']
                     for entry in nofEntriesMap.values()]),
                'nof_events_pass':
                sum([
                    entry['nof_events_pass']
                    for entry in memEvtRangeDict.values()
                ]),
                'nof_int_pass':
                sum([
                    entry['nof_int_pass']
                    for entry in memEvtRangeDict.values()
                ]),
                'nof_zero':
                sum([entry['nof_zero'] for entry in memEvtRangeDict.values()]),
                'nof_jobs':
                len(memEvtRangeDict),
            }

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_addMEM)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_addMEM(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        self.createMakefile(lines_makefile)

        ws_len = max([len(kk) + 1 for kk in statistics.keys()])
        total_nof_integrations_sum = sum(x['nof_int']
                                         for x in statistics.values())
        total_nof_entires = sum(x['nof_entries'] for x in statistics.values())
        total_nof_zero_int = sum(x['nof_zero'] for x in statistics.values())
        total_nof_jobs = sum(x['nof_jobs'] for x in statistics.values())
        total_nof_pass = sum(x['nof_events_pass'] for x in statistics.values())
        total_nof_int_pass_avg = float(
            sum(x['nof_int_pass']
                for x in statistics.values())) / total_nof_pass
        total_nof_integrations_avg = float(
            total_nof_integrations_sum) / total_nof_entires
        total_nof_int_per_job = float(
            total_nof_integrations_sum) / total_nof_jobs
        for k, v in statistics.iteritems():
            if v['nof_entries'] == 0:
                int_per_event = 0.
                evt_pass = 0.
            else:
                int_per_event = float(v['nof_int']) / v['nof_entries']
                evt_pass = (100 * float(v['nof_events_pass']) /
                            v['nof_entries'])
            if v['nof_events_pass'] == 0:
                nof_int_pass = 0.
            else:
                nof_int_pass = float(v['nof_int_pass']) / v['nof_events_pass']
            print(
                '%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d (%.2f%%) evt pass; %.2f int/evt pass; %d evt 0int)'
                % (
                    k,
                    ' ' * (ws_len - len(k)),
                    v['nof_int'],
                    v['nof_entries'],
                    v['nof_jobs'],
                    int_per_event,
                    v['nof_events_pass'],
                    evt_pass,
                    nof_int_pass,
                    v['nof_zero'],
                ))
        print(
            '%s%s: %d (%d entries; %d jobs; %.2f int/evt; %d evt pass; %.2f int/evt pass; '
            '%.2f int/job pass; %d evt 0int)' % (
                'total',
                ' ' * (ws_len - len('total')),
                total_nof_integrations_sum,
                total_nof_entires,
                total_nof_jobs,
                total_nof_integrations_avg,
                total_nof_pass,
                total_nof_int_pass_avg,
                total_nof_int_per_job,
                total_nof_zero_int,
            ))

        if self.max_mem_integrations > 0 and total_nof_integrations_sum > self.max_mem_integrations:
            logging.error(
                "Will not start the jobs (max nof integrations exceeded)!")
            return False
        else:
            logging.info("Done")
            return True
예제 #42
0
    def create(self):
        """Creates all necessary config files and runs the PU profile production -- either locally or on the batch system
        """

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue

            process_name = sample_info["process_name_specific"]
            is_mc = (sample_info["type"] == "mc")

            if not is_mc:
                continue

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable, process_name))

            inputFileList = generateInputFileList(sample_info,
                                                  self.max_files_per_job)
            key_dir = getKey(process_name)

            outputFile = os.path.join(self.dirs[key_dir][DKEY_HISTO],
                                      "%s.root" % process_name)
            self.outputFiles[process_name] = {
                'inputFiles': [],
                'outputFile': outputFile,
            }
            if os.path.isfile(outputFile) and tools_is_file_ok(
                    outputFile, min_file_size=2000):
                logging.info('File {} already exists --> skipping job'.format(
                    outputFile))
                continue

            for jobId in inputFileList.keys():

                key_file = getKey(sample_name, jobId)

                self.inputFiles[key_file] = inputFileList[jobId]
                if len(self.inputFiles[key_file]) == 0:
                    logging.warning("'%s' = %s --> skipping job !!" %
                                    (key_file, self.inputFiles[key_file]))
                    continue

                self.cfgFiles_projection[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "project_%s_%i_cfg.txt" % (process_name, jobId))
                self.outputFiles_tmp[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_HISTO_TMP],
                    "histogram_%i.root" % jobId)
                self.logFiles_projection[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_LOGS],
                    "project_%s_%i.log" % (process_name, jobId))
                self.scriptFiles_projection[key_file] = os.path.join(
                    self.dirs[key_dir][DKEY_CFGS],
                    "project_%s_%i_cfg.sh" % (process_name, jobId))
                projection_module = self.projection_module
                if projection_module == "count":
                    projection_module = "countHistogramAll"
                    if sample_name.startswith('/TTTo'):
                        projection_module += "CompTopRwgt"
                    elif sample_info['sample_category'].startswith('ttH'):
                        projection_module += "CompHTXS"
                    elif isSplitByNlheJet(process_name):
                        projection_module += "SplitByLHENjet"
                    elif isSplitByNlheHT(process_name):
                        projection_module += "SplitByLHEHT"
                    elif isSplitByNlheJetHT(process_name, sample_name):
                        projection_module += "SplitByLHENjetHT"
                self.jobOptions_sbatch[key_file] = {
                    'histName': process_name,
                    'inputFiles': self.inputFiles[key_file],
                    'cfgFile_path': self.cfgFiles_projection[key_file],
                    'outputFile': self.outputFiles_tmp[key_file],
                    'logFile': self.logFiles_projection[key_file],
                    'scriptFile': self.scriptFiles_projection[key_file],
                    'projection_module': projection_module,
                }
                if self.projection_module != 'puHist':
                    self.jobOptions_sbatch[key_file][
                        'ref_genWeight'] = self.ref_genWeights[process_name]
                    if process_name not in self.ref_genWeights:
                        raise RuntimeError(
                            "Unable to find reference LHE weight for process %s"
                            % process_name)
                self.createCfg_project(self.jobOptions_sbatch[key_file])
                self.outputFiles[process_name]['inputFiles'].append(
                    self.outputFiles_tmp[key_file])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable)
            self.num_jobs['project'] += self.createScript_sbatch(
                self.executable, self.sbatchFile_projection,
                self.jobOptions_sbatch)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_project(lines_makefile)
        self.addToMakefile_hadd(lines_makefile)
        if self.plot:
            self.addToMakefile_plot(lines_makefile)
        self.addToMakefile_finalHadd(lines_makefile)
        self.createMakefile(lines_makefile)
        logging.info("Done")

        return self.num_jobs
예제 #43
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in ["Tight", "forBDTtraining"]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for leptonChargeSelection in self.leptonChargeSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info
                ):
                  continue
                  
                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")   

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:

      hadTau_selection = "Tight"
      hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])

      electron_selection = lepton_selection
      muon_selection = lepton_selection

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      if "forBDTtraining" in lepton_selection:
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for leptonChargeSelection in self.leptonChargeSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if self.isBDTtraining or lepton_selection.find("Tight") == -1 \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'leptonChargeSelection'  : leptonChargeSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'hadTauSelection'          : hadTau_selection,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'fillGenEvtHistograms'     : True,
		          'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'selectBDT'                : self.isBDTtraining,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"),
          # separately for "nonfake" and "fake" contributions
          genMatch_categories = [ "nonfake", "fake" ]
          for genMatch_category in genMatch_categories:
            for signal_base, signal_input in self.signal_io.items():
              addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category)
              key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
              if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                continue
              processes_input = signal_input
              process_output = signal_base
              if genMatch_category == "fake":
                processes_input = [ process_input + "_fake" for process_input in processes_input ]
                process_output += "_fake"
              self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
                'processes_input' : processes_input,
                'process_output' : process_output
              }
              self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
              key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for leptonChargeSelection in self.leptonChargeSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", leptonChargeSelection)
      category_sideband = "hh_4l_%s_Fakeable_wFakeRateWeights" % leptonChargeSelection
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % leptonChargeSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % leptonChargeSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % leptonChargeSelection),
        'category_signal' : "hh_4l_%s_Tight" % leptonChargeSelection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.leptonChargeSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : '4l',
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      if "SS" in self.leptonChargeSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : '4l SS',
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      for leptonChargeSelection in self.leptonChargeSelections:
        key_prep_dcard_job = getKey(leptonChargeSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, leptonChargeSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(leptonChargeSelection, histogramToFit)        
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if leptonChargeSelection == "OS":
          histogramDir_nominal = self.histogramDir_prep_dcard
        elif leptonChargeSelection == "SS":
          histogramDir_nominal = self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'leptonChargeSelection' = %s !!" % leptonChargeSelection)
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, leptonChargeSelection)]
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : '4l',
        'make_plots_backgrounds' : self.make_plots_backgrounds,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")                  
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "4l SS",
        'make_plots_backgrounds' : self.make_plots_backgrounds,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "OS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs