Exemplo n.º 1
0
  def __init__(self, outputDir, executable_analyze, samples, lepton_selections, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               histograms_to_fit = [], select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "charge_flip", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples
    
    self.prep_dcard_processesToCopy = ["data_obs", "DY", "DY_fake", "WJets", "TTbar", "Singletop", "Diboson"]
    self.prep_dcard_signals = [ "DY" ]
    
    self.lepton_selections = lepton_selections
    
    #self.hadTau_selection = hadTau_selection

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
          key_dir = getKey(sample_name, lepton_selection)  
          for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_RLES  ]:
            initDict(self.dirs, [ key_dir, dir_type ])
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
              "_".join([ lepton_selection ]), process_name)
    ##print "self.dirs = ", self.dirs

    self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_charge_flip_cfg.py")
    self.cfgFile_prep_dcard_original = os.path.join(self.workingDir, "prepareDatacards_cfg.py")
    #self.histogramDir_prep_dcard = "charge_flip_SS_Tight"
    self.select_rle_output = select_rle_output
Exemplo n.º 2
0
    def __init__(self, treeName, outputDir, executable_addMEM, samples, era, debug, running_method,
                 max_files_per_job, mem_integrations_per_job, max_mem_integrations, num_parallel_jobs):

        self.treeName = treeName
        self.outputDir = outputDir
        self.executable_addMEM = executable_addMEM
        self.channel = "2lss_1tau"
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.samples = samples
        self.era = era
        self.debug = debug
        assert(running_method.lower() in [
          "sbatch", "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(
          self.outputDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(os.path.join(
          self.outputDir, "stdout_%s.log" % self.channel), 'w', 'utf-8')
        self.stderr_file = codecs.open(os.path.join(
          self.outputDir, "stderr_%s.log" % self.channel), 'w', 'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFile_addMEM_original = os.path.join(self.workingDir, "addMEM_2lss_1tau_cfg.py")
        self.cfgFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(
          self.outputDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [ DKEY_CFGS, DKEY_NTUPLES, DKEY_FINAL_NTUPLES, DKEY_LOGS, DKEY_HADD ]:
                initDict(self.dirs, [ key_dir, dir_type ])
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
Exemplo n.º 3
0
  def __init__(self, configDir, outputDir, executable_analyze, samples, lepton_selections, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               histograms_to_fit = [], select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, configDir, outputDir, executable_analyze, "charge_flip", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples
    
    self.prep_dcard_processesToCopy = ["data_obs", "DY", "DY_fake", "WJets", "TTbar", "Singletop", "Diboson"]
    self.prep_dcard_signals = [ "DY" ]
    
    self.lepton_selections = lepton_selections
    
    #self.hadTau_selection = hadTau_selection

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]


      # Edit Siddh ~~~~~~~~~~~~~     
      run_process = False
      for sprocess_run in sample_process_run_s:
        if sprocess_run == process_name:
          run_process = True
          #print "Run process: ", sprocess_run 

      if run_process == False: 
        continue
        #print "run_process:",process_name                                                                                                                                       
      # ~~~~~~~~~~~~~~~~~~~~~~~~~~                                                                                                                                             

      for lepton_selection in self.lepton_selections:
          key_dir = getKey(sample_name, lepton_selection)  
          for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_RLES  ]:
            initDict(self.dirs, [ key_dir, dir_type ])
            if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
              self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                "_".join([ lepton_selection ]), process_name)
            else:
              self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                "_".join([ lepton_selection ]), process_name)
    ##print "self.dirs = ", self.dirs

    self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_charge_flip_cfg.py")
    self.cfgFile_prep_dcard_original = os.path.join(self.workingDir, "prepareDatacards_cfg.py")
    #self.histogramDir_prep_dcard = "charge_flip_SS_Tight"
    self.select_rle_output = select_rle_output
  def __init__(self, outputDir, executable_analyze, samples, charge_selections,
               jet_minPt, jet_maxPt, jet_minAbsEta, jet_maxAbsEta, hadTau_selections, absEtaBins, ptBins, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs,
               executable_comp_jetToTauFakeRate):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "jetToTauFakeRate", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs,
      [])

    self.samples = samples

    self.charge_selections = charge_selections

    self.jet_minPt = jet_minPt
    self.jet_maxPt = jet_maxPt
    self.jet_minAbsEta = jet_minAbsEta
    self.jet_maxAbsEta = jet_maxAbsEta

    self.hadTau_selections = hadTau_selections
    
    self.absEtaBins = absEtaBins
    self.ptBins = ptBins

    self.executable_comp_jetToTauFakeRate = executable_comp_jetToTauFakeRate
    
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for charge_selection in self.charge_selections:
        key_dir = getKey(sample_name, charge_selection)  
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD ]:
          initDict(self.dirs, [ key_dir, dir_type ])
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
            "_".join([ charge_selection ]), process_name)
    ##print "self.dirs = ", self.dirs

    self.cfgFile_analyze_original = os.path.join(self.workingDir, "analyze_jetToTauFakeRate_cfg.py")
    self.histogramFile_comp_jetToTauFakeRate = {}
    self.cfgFile_comp_jetToTauFakeRate_original = os.path.join(self.workingDir, "comp_jetToTauFakeRate_cfg.py")
    self.cfgFile_comp_jetToTauFakeRate_modified = {}    
    self.histogramDir_numerator = {}
    self.histogramDir_denominator = {}
Exemplo n.º 5
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          central_or_shifts_extended = [ "" ]
          central_or_shifts_extended.extend(self.central_or_shifts)
          central_or_shifts_extended.extend([ "hadd", "addBackgrounds" ])
          for central_or_shift_or_dummy in central_or_shifts_extended:
            process_name_extended = [ process_name, "hadd" ]
            for process_name_or_dummy in process_name_extended:
              key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, central_or_shift_or_dummy)
              for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]:
                initDict(self.dirs, [ key_dir, dir_type ])
                if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                  self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                    "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy)
                else:
                  self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                    "_".join([ lepton_selection_and_frWeight ]), process_name_or_dummy, central_or_shift_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
            continue
          process_name = sample_info["process_name_specific"]
          logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

          sample_category = sample_info["sample_category"]
          is_mc = (sample_info["type"] == "mc")
          is_signal = (sample_category == "signal")

          for central_or_shift in self.central_or_shifts:

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
              if central_or_shift != "central":
                isFR_shape_shift = (central_or_shift in systematics.FR_all)
                if not ((lepton_selection == "Fakeable" and isFR_shape_shift) or lepton_selection == "Tight"):
                  continue
                if not is_mc and not isFR_shape_shift:
                  continue

              if central_or_shift in systematics.LHE().ttH and sample_category != "signal":
                continue
              if central_or_shift in systematics.LHE().ttW and sample_category != "TTW":
                continue
              if central_or_shift in systematics.LHE().ttZ and sample_category != "TTZ":
                continue
              if central_or_shift in systematics.DYMCReweighting and not is_dymc_reweighting(sample_name):
                continue

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, central_or_shift)
              analyze_job_tuple = (process_name, lepton_selection_and_frWeight, central_or_shift, jobId)
              key_analyze_job = getKey(*analyze_job_tuple)
              ntupleFiles = inputFileList[jobId]
              if len(ntupleFiles) == 0:
                logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                continue

              syncOutput = ''
              syncTree = ''
              syncRequireGenMatching = True
              if self.do_sync:
                mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                if lepton_selection_and_frWeight == 'Tight':
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                  syncTree = 'syncTree_%s_SR' % self.channel
                  syncRequireGenMatching = True
                elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights':
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                  syncTree = 'syncTree_%s_Fake' % self.channel
                elif mcClosure_match:
                  mcClosure_type = mcClosure_match.group('type')
                  syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                  syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type)
                else:
                  continue
              if syncTree and central_or_shift != "central":
                syncTree = os.path.join(central_or_shift, syncTree)
              syncRLE = ''
              if self.do_sync and self.rle_select:
                syncRLE = self.rle_select % syncTree
                if not os.path.isfile(syncRLE):
                  logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                  continue
              if syncOutput:
                self.inputFiles_sync['sync'].append(syncOutput)

              cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
              logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
              rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                   if self.select_rle_output else ""
              histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)

              self.jobOptions_analyze[key_analyze_job] = {
                'ntupleFiles'              : ntupleFiles,
                'cfgFile_modified'         : cfgFile_modified_path,
                'histogramFile'            : histogramFile_path,
                'logFile'                  : logFile_path,
                'selEventsFileName_output' : rleOutputFile_path,
                'electronSelection'        : electron_selection,
                'muonSelection'            : muon_selection,
                'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                'hadTauSelection_veto'     : hadTauVeto_selection,
                'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                'central_or_shift'         : central_or_shift,
                'syncOutput'               : syncOutput,
                'syncTree'                 : syncTree,
                'syncRLE'                  : syncRLE,
                'syncRequireGenMatching'   : syncRequireGenMatching,
                'useNonNominal'            : self.use_nonnominal,
                'apply_hlt_filter'         : self.hlt_filter,
              }
              self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

              # initialize input and output file names for hadd_stage1
              key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight)
              hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight)
              key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
              if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
              self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
              self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                              "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple)

          if self.do_sync: continue

          if is_mc:
            logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name)

            sample_categories = [ sample_category ]
            if is_signal:
              sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz", "ttH_hmm", "ttH_hzg" ]
            for sample_category in sample_categories:
              # sum non-fake and fake contributions for each MC sample separately
              genMatch_categories = [ "nonfake", "conversions", "fake" ]
              for genMatch_category in genMatch_categories:
                key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight)
                key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, "addBackgrounds")
                addBackgrounds_job_tuple = None
                processes_input = None
                process_output = None
                if genMatch_category == "nonfake":
                  # sum non-fake contributions for each MC sample separately
                  # input processes: TT3l0g0j,...
                  # output processes: TT; ...
                  if sample_category in [ "signal" ]:
                    lepton_genMatches = []
                    lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                    lepton_genMatches.extend(self.lepton_genMatches_conversions)
                    lepton_genMatches.extend(self.lepton_genMatches_fakes)
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_genMatches ]
                  elif sample_category in [ "ttH" ]:
                    lepton_genMatches = []
                    lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                    lepton_genMatches.extend(self.lepton_genMatches_conversions)
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ]
                  process_output = sample_category
                  addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight)
                elif genMatch_category == "conversions":
                  # sum fake contributions for each MC sample separately
                  # input processes: TT2l1g0j, TT1l2g0j, TT0l3g0j; ...
                  # output processes: TT_conversion; ...
                  if sample_category in [ "signal" ]:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ]
                  elif sample_category in [ "ttH" ]:
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_conversions ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_conversions ]
                  process_output = "%s_conversion" % sample_category
                  addBackgrounds_job_tuple = (process_name, "%s_conversion" % sample_category, lepton_selection_and_frWeight)
                elif genMatch_category == "fake":
                  # sum fake contributions for each MC sample separately
                  # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l2g1j, TT0l1g2j, TT0l0g3j; ...
                  # output processes: TT_fake; ...
                  if sample_category in [ "signal" ]:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                  elif sample_category in [ "ttH" ]:
                    processes_input = []
                    processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                  else:
                    processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                  process_output = "%s_fake" % sample_category
                  addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight)
                if processes_input:
                  logging.info(" ...for genMatch option = '%s'" % genMatch_category)
                  key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple)
                  cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple)
                  outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_tuple)
                  self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                    'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                    'cfgFile_modified' : cfgFile_modified,
                    'outputFile' : outputFile,
                    'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                    'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
                    'processes_input' : processes_input,
                    'process_output' : process_output
                  }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                  # initialize input and output file names for hadd_stage1_5
                  key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight)                  
                  key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
                  if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
                  self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                  self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                                      "hadd_stage1_5_%s.root" % lepton_selection_and_frWeight)

          # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
          if not is_mc:
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight)
            key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

        if self.do_sync: continue

        # sum fake background contributions for the total of all MC sample
        # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ...
        # output process: fakes_mc
        key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
        key_addBackgrounds_dir = getKey("addBackgrounds")
        addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight)
        key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
        sample_categories = []
        sample_categories.extend(self.nonfake_backgrounds)
        sample_categories.extend([ "signal" ])
        processes_input = []
        for sample_category in sample_categories:
          processes_input.append("%s_fake" % sample_category)
        self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
          'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_fakes_tuple),
          'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_fakes_tuple),
          'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
          'processes_input' : processes_input,
          'process_output' : "fakes_mc"
        }
        self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

        # sum conversion background contributions for the total of all MC sample
        # input processes: TT2l0g1j, TT1l1g1j, TT1l0g2j, TT0l3j, TT0l3j, TT0l3j, TT0l3j; ...
        # output process: conversions
        addBackgrounds_job_conversions_tuple = ("conversions", lepton_selection_and_frWeight)
        key_addBackgrounds_job_conversions = getKey(*addBackgrounds_job_conversions_tuple)
        sample_categories = []
        sample_categories.extend(self.nonfake_backgrounds)
        sample_categories.extend([ "signal" ])
        processes_input = []
        for sample_category in sample_categories:
          processes_input.append("%s_conversion" % sample_category)
        self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_cfg.py" % addBackgrounds_job_conversions_tuple),
          'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s.root" % addBackgrounds_job_conversions_tuple),
          'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s.log" % addBackgrounds_job_conversions_tuple),
          'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight) ],
          'processes_input' : processes_input,
          'process_output' : "conversions"
        }
        self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions])

        # initialize input and output file names for hadd_stage2
        key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight)
        key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight)        
        key_hadd_stage2_job = getKey(lepton_selection_and_frWeight)
        if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
          self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
        if lepton_selection == "Tight":
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_conversions]['outputFile'])        
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
        self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                        "hadd_stage2_%s.root" % lepton_selection_and_frWeight)

    if self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_syncNtuple(lines_makefile)
      outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
      self.outputFile_sync['sync'] = outputFile_sync_path
      self.targets.append(outputFile_sync_path)
      self.addToMakefile_hadd_sync(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"))
    key_addFakes_job = getKey("fakes_data")
    category_sideband = "ttZctrl_Fakeable_wFakeRateWeights"
    self.jobOptions_addFakes[key_addFakes_job] = {
      'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
      'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_cfg.py"),
      'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes.root"),
      'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes.log"),
      'category_signal' : "ttZctrl_Tight",
      'category_sideband' : category_sideband
    }
    self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)      
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      key_prep_dcard_job = getKey(histogramToFit)
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
      key_add_syst_fakerate_dir = getKey("addSystFakeRates")                                    
      add_syst_fakerate_job_tuple = (self.channel, histogramToFit) 
      key_add_syst_fakerate_job = getKey(histogramToFit)      
      self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
        'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
        'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple),
        'category' : self.channel,
        'histogramToFit' : histogramToFit,
        'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png")
      }
      histogramDir_nominal = self.histogramDir_prep_dcard
      for lepton_type in [ 'e', 'm' ]:
        lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
        if lepton_mcClosure not in self.lepton_selections:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
        key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight)
        histogramDir_mcClosure = self.mcClosure_dir[lepton_mcClosure]
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
          'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
          'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
          'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
          'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
        })
      self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"))
    key_makePlots_dir = getKey("makePlots")                                       
    key_makePlots_job = getKey('')
    self.jobOptions_make_plots[key_makePlots_job] = {
      'executable' : self.executable_make_plots,
      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
      'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
      'histogramDir' : self.histogramDir_prep_dcard,
      'label' : "t#bar{t}Z control region",
      'make_plots_backgrounds' : self.make_plots_backgrounds
    }
    self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 6
0
    def __init__(self,
                 configDir,
                 outputDir,
                 executable_prodNtuple,
                 cfgFile_prodNtuple,
                 samples,
                 max_files_per_job,
                 era,
                 preselection_cuts,
                 leptonSelection,
                 hadTauSelection,
                 debug,
                 running_method,
                 version,
                 num_parallel_jobs,
                 pool_id=''):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable_prodNtuple = executable_prodNtuple
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.preselection_cuts = preselection_cuts
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        self.debug = debug
        assert (running_method.lower()
                in ["sbatch",
                    "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple")
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        self.version = version

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(
            os.path.join(self.configDir, "stdout_prodNtuple.log"), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.configDir, "stderr_prodNtuple.log"), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFile_prodNtuple_original = os.path.join(
            self.workingDir, cfgFile_prodNtuple)
        self.cfgFiles_prodNtuple_modified = {}
        self.logFiles_prodNtuple = {}
        self.sbatchFile_prodNtuple = os.path.join(self.configDir,
                                                  "sbatch_prodNtuple.py")
        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in [DKEY_CFGS, DKEY_LOGS]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type)
        ##print "self.dirs = ", self.dirs

        self.cvmfs_error_log = {}
Exemplo n.º 7
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
                ##print "processing sample %s: jobId = %i" % (process_name, jobId)

                # build config files for executing analysis code
                key_analyze_dir = getKey(process_name)
                analyze_job_tuple = (process_name, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                    logging.warning(
                        "No input ntuples for %s --> skipping job !!" %
                        (key_analyze_job))
                    continue

                cfgFile_modified_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_CFGS],
                    "analyze_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_LOGS],
                    "analyze_%s_%i.log" % analyze_job_tuple)
                histogramFile_path = os.path.join(
                    self.dirs[key_analyze_dir][DKEY_HIST],
                    "analyze_%s_%i.root" % analyze_job_tuple)

                self.jobOptions_analyze[key_analyze_job] = {
                    'ntupleFiles': ntupleFiles,
                    'cfgFile_modified': cfgFile_modified_path,
                    'histogramFile': histogramFile_path,
                    'histogramDir': 'analyze_hadTopTagger',
                    'logFile': logFile_path,
                    'hadTauSelection': self.hadTau_selection,
                    'lumiScale': 1.,
                    'selectBDT': True,
                }
                self.createCfg_analyze(
                    self.jobOptions_analyze[key_analyze_job], sample_info)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name,
                                             lepton_selection_and_frWeight)
                key_hadd_stage1_job = getKey(process_name)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                    self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(
                    self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[
                    key_hadd_stage1_job] = os.path.join(
                        self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                        "hadd_stage1_%s.root" % process_name)
                self.targets.append(
                    self.outputFile_hadd_stage1[key_hadd_stage1_job])

        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done.")

        return self.num_jobs
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in ["Tight", "forBDTtraining"]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for leptonChargeSelection in self.leptonChargeSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info
                ):
                  continue
                  
                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")   

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:

      hadTau_selection = "Tight"
      hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])

      electron_selection = lepton_selection
      muon_selection = lepton_selection

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      if "forBDTtraining" in lepton_selection:
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for leptonChargeSelection in self.leptonChargeSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if self.isBDTtraining or lepton_selection.find("Tight") == -1 \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'leptonChargeSelection'  : leptonChargeSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'hadTauSelection'          : hadTau_selection,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'fillGenEvtHistograms'     : True,
		          'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'selectBDT'                : self.isBDTtraining,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"),
          # separately for "nonfake" and "fake" contributions
          genMatch_categories = [ "nonfake", "fake" ]
          for genMatch_category in genMatch_categories:
            for signal_base, signal_input in self.signal_io.items():
              addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category)
              key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
              if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                continue
              processes_input = signal_input
              process_output = signal_base
              if genMatch_category == "fake":
                processes_input = [ process_input + "_fake" for process_input in processes_input ]
                process_output += "_fake"
              self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, leptonChargeSelection) ],
                'processes_input' : processes_input,
                'process_output' : process_output
              }
              self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
              key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for leptonChargeSelection in self.leptonChargeSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", leptonChargeSelection)
      category_sideband = "hh_4l_%s_Fakeable_wFakeRateWeights" % leptonChargeSelection
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % leptonChargeSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % leptonChargeSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % leptonChargeSelection),
        'category_signal' : "hh_4l_%s_Tight" % leptonChargeSelection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.leptonChargeSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : '4l',
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      if "SS" in self.leptonChargeSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : '4l SS',
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      for leptonChargeSelection in self.leptonChargeSelections:
        key_prep_dcard_job = getKey(leptonChargeSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, leptonChargeSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(leptonChargeSelection, histogramToFit)        
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if leptonChargeSelection == "OS":
          histogramDir_nominal = self.histogramDir_prep_dcard
        elif leptonChargeSelection == "SS":
          histogramDir_nominal = self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'leptonChargeSelection' = %s !!" % leptonChargeSelection)
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, leptonChargeSelection)]
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : '4l',
        'make_plots_backgrounds' : self.make_plots_backgrounds,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")                  
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "4l SS",
        'make_plots_backgrounds' : self.make_plots_backgrounds,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "OS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 9
0
  def __init__(self, outputDir, executable_analyze, cfgFile_analyze_original, samples, hadTau_selection, hadTau_charge_selections, applyFakeRateWeights, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               executable_addBackgrounds, executable_addBackgroundJetToTauFakes, histograms_to_fit, select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "1l_2tau", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples
    
    self.lepton_and_hadTau_selections = [ "Tight", "Fakeable", "Fakeable_mcClosure" ]
    self.lepton_and_hadTau_frWeights = [ "enabled", "disabled" ]
    self.hadTau_selection_part2 = hadTau_selection
    self.hadTau_charge_selections = hadTau_charge_selections
    self.applyFakeRateWeights = applyFakeRateWeights
        
    self.lepton_genMatches = [ "1l0j", "0l1j" ]
    self.hadTau_genMatches = [ "2t0e0m0j", "1t1e0m0j", "1t0e1m0j", "1t0e0m1j", "0t2e0m0j", "0t1e1m0j", "0t1e0m1j", "0t0e2m0j", "0t0e1m1j", "0t0e0m2j" ]
    
    self.apply_leptonGenMatching = None
    self.apply_hadTauGenMatching = None
    self.lepton_and_hadTau_genMatches_nonfakes = []
    self.lepton_and_hadTau_genMatches_fakes = []
    if self.applyFakeRateWeights == "3L":
      self.apply_leptonGenMatching = True
      self.apply_hadTauGenMatching = True
      for lepton_genMatch in self.lepton_genMatches:
        for hadTau_genMatch in self.hadTau_genMatches:
          lepton_and_hadTau_genMatch = "&".join([ lepton_genMatch, hadTau_genMatch ])
          if lepton_genMatch.endswith("0j") and hadTau_genMatch.endswith("0j"):
            self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_and_hadTau_genMatch)
          else:
            self.lepton_and_hadTau_genMatches_fakes.append(lepton_and_hadTau_genMatch)
    elif applyFakeRateWeights == "2tau":
      self.apply_leptonGenMatching = False
      self.apply_hadTauGenMatching = True
      for hadTau_genMatch in self.hadTau_genMatches:
        if hadTau_genMatch.endswith("0j"):
          self.lepton_and_hadTau_genMatches_nonfakes.append(hadTau_genMatch)
        else:
          self.lepton_and_hadTau_genMatches_fakes.append(hadTau_genMatch)
    else:
      raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)

    self.executable_addBackgrounds = executable_addBackgrounds
    self.executable_addFakes = executable_addBackgroundJetToTauFakes
    
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for hadTau_charge_selection in self.hadTau_charge_selections:
            key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection)  
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name)
    for dir_type in [ DKEY_DCRD, DKEY_PLOT ]:
      initDict(self.dirs, [ dir_type ])
      self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)          
    ##print "self.dirs = ", self.dirs

    self.nonfake_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares" ]
    
    self.cfgFile_analyze_original = os.path.join(self.workingDir, cfgFile_analyze_original)
    self.cfgFile_addBackgrounds_original = os.path.join(self.workingDir, "addBackgrounds_cfg.py")
    self.cfgFile_addBackgrounds_modified = {}
    self.histogramFile_addBackgrounds = {}
    self.histogramDir_addBackgrounds = {}    
    self.process_output_addBackgrounds = {}            
    self.histogramFile_hadd_stage1_5 = os.path.join(self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel)
    self.cfgFile_addFakes_original = os.path.join(self.workingDir, "addBackgroundJetToTauFakes_cfg.py")
    self.cfgFile_addFakes_modified = {}
    self.histogramFile_addFakes = {}
    self.prep_dcard_processesToCopy = [ "data_obs" ] + self.nonfake_backgrounds + [ "fakes_data", "fakes_mc" ]
    self.histogramDir_prep_dcard = "1l_2tau_OS_Tight"
    self.histogramDir_prep_dcard_SS = "1l_2tau_SS_Tight"
    self.make_plots_backgrounds = self.nonfake_backgrounds + [ "fakes_data" ]
    self.cfgFile_make_plots_mcClosure_original = os.path.join(self.workingDir, "makePlots_mcClosure_cfg.py")
    self.cfgFiles_make_plots_mcClosure_modified = []

    self.select_rle_output = select_rle_output
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for leptonChargeSelection in self.leptonChargeSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, leptonChargeSelection, sample_info
                ):
                  continue
                
                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, leptonChargeSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)                
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.") 

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for leptonChargeSelection in self.leptonChargeSelections:

          if 'mcClosure' in lepton_selection and leptonChargeSelection != 'SS':
            continue

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, leptonChargeSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, leptonChargeSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift)
              
              for jobId in inputFileList.keys():
                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if lepton_selection.find("Tight") == -1 \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTauVeto_selection,
                  'leptonChargeSelection'    : leptonChargeSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'gen_mHH'                  : self.gen_mHH,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, leptonChargeSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining or self.do_sync:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining or self.do_sync:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = self.get_sample_categories()
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_flip, TTW_flip, TTWW_flip, ...
          # output process: flips_mc
          addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_flip" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple),
            'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
            'processes_input' : processes_input,
            'process_output' : "flips_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, leptonChargeSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # sum signal contributions from HH->4tau ("tttt"), HH->2W2tau ("wwtt"), and HH->4W ("wwww"),
          # separately for "nonfake" and "fake" contributions
          genMatch_categories = [ "nonfake", "fake" ]
          for genMatch_category in genMatch_categories:
            for signal_base, signal_input in self.signal_io.items():
              addBackgrounds_job_signal_tuple = (lepton_selection_and_frWeight, leptonChargeSelection, signal_base, genMatch_category)
              key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
              if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                continue
              processes_input = signal_input
              process_output = signal_base
              if genMatch_category == "fake":
                processes_input = [ process_input + "_fake" for process_input in processes_input ]
                process_output += "_fake"
              self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                'categories' : [ getHistogramDir(category, lepton_selection, lepton_frWeight, leptonChargeSelection) for category in self.categories ],
                'processes_input' : processes_input,
                'process_output' : process_output
              }
              self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
              key_hadd_stage2_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, leptonChargeSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, leptonChargeSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining or self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.isBDTtraining:
          self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
        elif self.do_sync:
          self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      if self.isBDTtraining:
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
      elif self.do_sync:
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.targets.append(outputFile_sync_path)
        self.addToMakefile_hadd_sync(lines_makefile)
      else:
        raise ValueError("Internal logic error")
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for category in self.categories:
      for leptonChargeSelection in self.leptonChargeSelections:
        key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), leptonChargeSelection)
        key_addFakes_dir = getKey("addBackgroundLeptonFakes")
        addFakes_job_tuple = (category, leptonChargeSelection)
        key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple)        
        self.jobOptions_addFakes[key_addFakes_job] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple),
          'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple),
          'category_signal' : getHistogramDir(category, "Tight", "disabled", leptonChargeSelection),
          'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", leptonChargeSelection)
          }
        self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), leptonChargeSelection)
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    #--------------------------------------------------------------------------
    # add histograms in OS and SS regions,
    # so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6:
      self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = []
    for category in self.categories:
      key_addFakes_job = getKey("data_fakes", category, leptonChargeSelection)
      self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])
    self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
    self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST],
                                                                        "hadd_stage1_6_Tight_OS.root")
    #--------------------------------------------------------------------------

    logging.info("Creating configuration files to run 'addBackgroundFlips'")
    for category in self.categories:
      key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_addFlips_dir = getKey("addBackgroundLeptonFlips")
      key_addFlips_job = getKey("data_flips", category)
      self.jobOptions_addFlips[key_addFlips_job] = {
        'inputFile' : self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_%s_cfg.py" % category),
        'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips_%s.root" % category),
        'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips_%s.log" % category),
        'category_signal' : getHistogramDir(category, "Tight", "disabled", "SS" ),
        'category_sideband' : getHistogramDir(category, "Tight", "disabled", "OS" )
        }
      self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'...")
    for category in self.categories:
      for histogramToFit in self.histograms_to_fit:
        logging.info(" ...  for category %s, histogram %s" % (category, histogramToFit))
        prep_dcard_HH = set()
        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"]:
            continue
          sample_category = sample_info["sample_category"]
          masses_to_exclude = ["3000", "2500", "2000", "1750", "1500", "1250"]
          if sample_category.startswith("signal"):
            sample_category = sample_info["sample_category_hh"]
            doAdd = False
            if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
              if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category:
                doAdd = True
              if "spin0" in histogramToFit and "spin0" in sample_category and histogramToFit[9:13] in sample_category:
                doAdd = True
              if "spin2" in histogramToFit and "spin2" in sample_category and histogramToFit[9:13] in sample_category:
                doAdd = True
              for mass in masses_to_exclude:
                if mass in sample_category: doAdd = False
            else:
              doAdd = True
            if doAdd:
              if "wwww" in sample_category:
                prep_dcard_HH.add(sample_category.replace("wwww", "zzzz"))
                prep_dcard_HH.add(sample_category.replace("wwww", "wwww"))
                prep_dcard_HH.add(sample_category.replace("wwww", "zzww"))
              elif "wwtt" in sample_category:
                prep_dcard_HH.add(sample_category.replace("wwtt", "ttzz"))
                prep_dcard_HH.add(sample_category.replace("wwtt", "ttww"))
              elif "tttt" in sample_category:                  
                prep_dcard_HH.add(sample_category)
              else:
                raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category)
        prep_dcard_HH = list(prep_dcard_HH)
        prep_dcard_H = []
        prep_dcard_other_nonfake_backgrounds = []
        for process in self.nonfake_backgrounds:
          if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]:
            prep_dcard_H.append("%s_hww" % process)
            prep_dcard_H.append("%s_hzz" % process)
            prep_dcard_H.append("%s_htt" % process)
            prep_dcard_H.append("%s_hbb" % process)
          else:
            prep_dcard_other_nonfake_backgrounds.append(process)
        self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "data_flips", "fakes_mc", "flips_mc" ]
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        key_prep_dcard_dir = getKey("prepareDatacards")
        prep_dcard_job_tuple = (self.channel, category, "SS", histogramToFit)
        key_prep_dcard_job = getKey(category, "SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : getHistogramDir(category, "Tight", "disabled", "SS"),
          'histogramToFit' : histogramToFit,
          'label' : "2lSS"
          }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

        if "OS" in self.leptonChargeSelections:
          key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
          prep_dcard_job_tuple = (self.channel, category, "OS", histogramToFit)
          key_prep_dcard_job = getKey(category, "OS", histogramToFit)
          self.jobOptions_prep_dcard[key_prep_dcard_job] = {
            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
            'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple),
            'histogramDir' : getHistogramDir(category, "Tight", "disabled", "OS"),
            'histogramToFit' : histogramToFit,
            'label' : "2lOS",
            }
          self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

        # add shape templates for the following systematic uncertainties:
        #  - 'CMS_ttHl_Clos_norm_e'
        #  - 'CMS_ttHl_Clos_shape_e'
        #  - 'CMS_ttHl_Clos_norm_m'
        #  - 'CMS_ttHl_Clos_shape_m'
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, category, "SS", histogramToFit)
        key_add_syst_fakerate_job = getKey(category, "SS", histogramToFit)
        key_prep_dcard_job = getKey(category, "SS", histogramToFit)
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : category,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = "%s/sel/evt/fakes_mc" % getHistogramDir(category, "Tight", "disabled", "SS")
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, "SS")
          histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_mcClosure, "SS")]
          if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
            histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard")
            histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard")
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])
            
    logging.info("Creating configuration files to run 'makePlots'")
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
    key_makePlots_dir = getKey("makePlots")        
    key_makePlots_job = getKey("SS")
    self.jobOptions_make_plots[key_makePlots_job] = {
      'executable' : self.executable_make_plots,
      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
      'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
      'histogramDir' : self.histogramDir_prep_dcard,
      'label' : "2lSS",
      'make_plots_backgrounds' : self.make_plots_backgrounds,
    }
    self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "OS" in self.leptonChargeSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_OS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_OS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_OS,
        'label' : "2lOS",
        'make_plots_backgrounds' : self.make_plots_backgrounds_OS,
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel),
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips)
      self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 11
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
            continue

          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for chargeSumSelection in self.chargeSumSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                      central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info
                    ):
                  continue

                key_dir = getKey(process_name_or_dummy, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel,
                      "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.get_dir_type(dir_type), dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")   

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e|t)_wFakeRateWeights')
    for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
      lepton_selection = lepton_and_hadTau_selection
      hadTau_selection = lepton_and_hadTau_selection
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      if self.applyFakeRateWeights == "3tau":
        lepton_selection = "Tight"
      hadTau_selection = "|".join([ hadTau_selection, self.hadTau_selection_part2 ])

      if lepton_and_hadTau_selection == "forBDTtraining":
        lepton_selection = "Loose"
        electron_selection = lepton_selection
        muon_selection = lepton_selection
        hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed
      elif lepton_and_hadTau_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
        hadTau_selection = "Tight"
        hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])
      elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"
        hadTau_selection = "Tight"
        hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])
      elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t":
        electron_selection = "Tight"
        muon_selection = "Tight"
        hadTau_selection = "Fakeable"
        hadTau_selection = "|".join([hadTau_selection, self.hadTau_selection_part2])

      for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
        if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
          continue
        if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)

        for chargeSumSelection in self.chargeSumSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                applyFakeRateWeights = self.applyFakeRateWeights \
                  if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTau_selection,
                  'apply_hadTauGenMatching'  : self.apply_hadTauGenMatching,
                  'chargeSumSelection'       : chargeSumSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'selectBDT'                : self.isBDTtraining,
                  'gen_mHH'                  : self.gen_mHH,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
                hadd_stage1_job_tuple = (process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining:
              continue

            # add output files of hadd_stage1 to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
            hadd_stage1_5_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = self.get_sample_categories()
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          if self.isBDTtraining:
            continue

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          hadd_stage2_job_tuple = (lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_and_hadTau_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])          
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.addToMakefile_validate(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for chargeSumSelection in self.chargeSumSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", chargeSumSelection)      
      category_sideband = None
      if self.applyFakeRateWeights == "4L":
        category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection
      elif self.applyFakeRateWeights == "3tau":
        category_sideband = "hh_1l_3tau_%s_Fakeable_wFakeRateWeights" % chargeSumSelection
      else:
        raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection),
        'category_signal' : "hh_1l_3tau_%s_Tight" % chargeSumSelection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'...")
    for histogramToFit in self.histograms_to_fit:
      logging.info(" ...  for histogram %s" % histogramToFit)
      prep_dcard_HH = set()
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        sample_category = sample_info["sample_category"]
        if sample_category.startswith("signal"):
          sample_category = sample_info["sample_category_hh"]
          doAdd = False
          if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
            if ("SM" in histogramToFit or any(nonresPoint in histogramToFit for nonresPoint in NONRESONANT_KEYS)) and 'nonresonant' in sample_category:
              doAdd = True
            if ("spin0" in histogramToFit and "spin0" in sample_category) or ("spin2" in histogramToFit and "spin2" in sample_category):
              startpos = None
              for pattern in [ "MVAOutput", "BDTOutput" ]:
                if pattern in histogramToFit:
                  startpos = histogramToFit.find(pattern) + len(pattern) + 1 # CV: increment startpos by 1 to account for trailing "_"
              if not startpos:
                raise ValueError("Failed to parse histogram name = '%s' !!" % histogramToFit) 
              endpos = histogramToFit.find("_", startpos)
              masspoint = histogramToFit[startpos:endpos]
              if ("_%s_" % masspoint) in sample_category:
                doAdd = True
          else:
            doAdd = True
          if doAdd:
            if "_wwww" in sample_category:
              prep_dcard_HH.add(sample_category.replace("_wwww", "_zzzz"))
              prep_dcard_HH.add(sample_category.replace("_wwww", "_wwww"))
              prep_dcard_HH.add(sample_category.replace("_wwww", "_zzww"))
              if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit):
                prep_dcard_HH.add(sample_category.replace("_wwww", ""))
            elif "_wwtt" in sample_category:
              prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttzz"))
              prep_dcard_HH.add(sample_category.replace("_wwtt", "_ttww"))
              if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit):
                prep_dcard_HH.add(sample_category.replace("_wwtt", ""))
            elif "_tttt" in sample_category:
              prep_dcard_HH.add(sample_category)
              if not ("BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit):
                prep_dcard_HH.add(sample_category.replace("_tttt", ""))
            else:
              raise ValueError("Failed to identify relevant HH decay mode(s) for 'sample_category' = %s !!" % sample_category)
      prep_dcard_HH = list(prep_dcard_HH)
      prep_dcard_H = []
      prep_dcard_other_nonfake_backgrounds = []
      for process in self.nonfake_backgrounds:
        if process in [ "VH", "WH", "ZH", "TH", "tHq", "tHW", "TTH", "TTWH", "TTZH", "ggH", "qqH" ]:
          prep_dcard_H.append("%s_hww" % process)
          prep_dcard_H.append("%s_hzz" % process)
          prep_dcard_H.append("%s_htt" % process)
          prep_dcard_H.append("%s_hbb" % process)
        else:
          prep_dcard_other_nonfake_backgrounds.append(process)
      self.prep_dcard_processesToCopy = [ "data_obs" ] + prep_dcard_HH + prep_dcard_H + prep_dcard_other_nonfake_backgrounds + [ "Convs", "data_fakes", "fakes_mc" ]
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : None
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])
      if "SS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : 'SS'
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      #  - 'CMS_ttHl_Clos_norm_t'
      #  - 'CMS_ttHl_Clos_shape_t'
      for chargeSumSelection in self.chargeSumSelections:
        key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit)
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if chargeSumSelection == "OS":
          histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard
        elif chargeSumSelection == "SS":
          histogramDir_nominal = "%s/sel/evt/fakes_mc" % self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection)
        for lepton_and_hadTau_type in [ 'e', 'm', 't' ]:
          lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type
          if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections:
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_and_hadTau_selection_and_frWeight, chargeSumSelection)
          histogramDir_mcClosure = "%s/sel/evt/fakes_mc" % self.mcClosure_dir['%s_%s' % (lepton_and_hadTau_mcClosure, chargeSumSelection)]
          if "BDTOutput" in histogramToFit or "MVAOutput" in histogramToFit:
            histogramDir_nominal = histogramDir_nominal.replace("/sel/evt", "/sel/datacard")
            histogramDir_mcClosure = histogramDir_mcClosure.replace("/sel/evt", "/sel/datacard")
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections,
            'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : '1l+3#tau_{h}',
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_job = getKey("SS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "1l+3#tau_{h} SS",
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("Fakeable_mcClosure", "OS")      
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")
      process_name = sample_info["process_name_specific"]

      logging.info("Building dictionaries for sample %s..." % process_name)
      for chargeSumSelection in self.chargeSumSelections:
        for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
          for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
            if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
              continue
            if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
              continue

            lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
            central_or_shift_extensions = ["", "hadd", "copyHistograms", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if process_name_or_dummy in [ "hadd" ] and central_or_shift_or_dummy != "":
                  continue
                evtcategories_extended = [""]
                evtcategories_extended.extend(self.evtCategories)
                if central_or_shift_or_dummy in [ "hadd", "copyHistograms", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info
                ):
                  continue

                key_dir = getKey(process_name_or_dummy, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_RLES, DKEY_SYNC ]:
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                                                                "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                                                                "_".join([ lepton_and_hadTau_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                    
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for chargeSumSelection in self.chargeSumSelections:
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        lepton_selection = lepton_and_hadTau_selection
        ##hadTau_selection = None
        ##if lepton_and_hadTau_selection == "Tight":
        ##  hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp)
        ##else:
        ##  hadTau_selection = lepton_and_hadTau_selection
        hadTau_selection = "%s|%s" % (lepton_and_hadTau_selection, self.hadTau_mva_wp)
        electron_selection = lepton_selection
        muon_selection = lepton_selection
        
        if lepton_and_hadTau_selection == "Fakeable_mcClosure_e":
          electron_selection = "Fakeable"
          muon_selection = "Tight"
          hadTau_selection = "Tight|%s" % self.hadTau_mva_wp
        elif lepton_and_hadTau_selection == "Fakeable_mcClosure_m":
          electron_selection = "Tight"
          muon_selection = "Fakeable"
          hadTau_selection = "Tight|%s" % self.hadTau_mva_wp
        elif lepton_and_hadTau_selection == "Fakeable_mcClosure_t":
          electron_selection = "Tight"
          muon_selection = "Tight"
          hadTau_selection = "Fakeable"
          
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_and_hadTau_selection, chargeSumSelection, sample_category, sample_name, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_and_hadTau_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift)

              for jobId in inputFileList.keys():
                
                analyze_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                applyFakeRateWeights = self.applyFakeRateWeights  \
                  if self.isBDTtraining or not lepton_and_hadTau_selection == "Tight" \
                  else "disabled"

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTau_selection,
                  'apply_hadTauGenMatching'  : self.apply_hadTauGenMatching,
                  'chargeSumSelection'       : chargeSumSelection,
                  'applyFakeRateWeights'     : applyFakeRateWeights,
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'selectBDT'                : self.isBDTtraining,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'useObjectMultiplicity'    : True,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_and_hadTau_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "hadd")
                hadd_stage1_job_tuple = (process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST], 
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

                if self.isBDTtraining:
                  self.targets.append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

            if self.isBDTtraining:
              continue
            
            #----------------------------------------------------------------------------
            # split hadd_stage1 files into separate files, one for each event category
            for category in self.evtCategories:
              key_hadd_stage1_job = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_copyHistograms_dir = getKey(process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight, "copyHistograms")
              copyHistograms_job_tuple = (category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_copyHistograms_job = getKey(*copyHistograms_job_tuple)
              cfgFile_modified = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_CFGS], "copyHistograms_%s_%s_%s_%s_cfg.py" % copyHistograms_job_tuple)
              outputFile = os.path.join(self.dirs[key_copyHistograms_dir][DKEY_HIST], "copyHistograms_%s_%s_%s_%s.root" % copyHistograms_job_tuple)
              self.jobOptions_copyHistograms[key_copyHistograms_job] = {
                'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                'cfgFile_modified' : cfgFile_modified,
                'outputFile' : outputFile,
                'logFile' : os.path.join(self.dirs[key_copyHistograms_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                'categories' : [ category ],
              }
              self.createCfg_copyHistograms(self.jobOptions_copyHistograms[key_copyHistograms_job])
            #----------------------------------------------------------------------------

            # add output files of copyHistograms jobs to list of input files for hadd_stage1_5
            for category in self.evtCategories:
              key_copyHistograms_job = getKey(category, process_name, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage1_5_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              hadd_stage1_5_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
              if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_copyHistograms[key_copyHistograms_job]['outputFile'])
              self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                          "hadd_stage1_5_%s_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining:
            continue

          for category in self.evtCategories:
            # sum fake background contributions for the total of all MC sample
            # input processes: TT_fake, TTW_fake, TTWW_fake, ...
            # output process: fakes_mc
            key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
            key_addBackgrounds_dir = getKey("addBackgrounds")
            addBackgrounds_job_fakes_tuple = ("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
            key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
            sample_categories = []
            sample_categories.extend(self.nonfake_backgrounds)
            processes_input = []
            for sample_category in sample_categories:
              processes_input.append("%s_fake" % sample_category)
            self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
              'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
              'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
              'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
              'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
              'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
              'processes_input' : processes_input,
              'process_output' : "fakes_mc"
              }
            self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])
            
            # sum conversion background contributions for the total of all MC sample
            # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
            # output process: Convs
            addBackgrounds_job_Convs_tuple = ("Convs", category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection)
            key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
            sample_categories = []
            sample_categories.extend(self.nonfake_backgrounds)
            processes_input = []
            for sample_category in sample_categories:
              processes_input.append("%s_Convs" % sample_category)
            self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
              'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
              'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
              'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
              'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
              'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
              'processes_input' : processes_input,
              'process_output' : "Convs"
              }
            self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

            # sum signal contributions from gluon fusion and VBF HH production,
            # separately for "nonfake" and "fake" contributions
            genMatch_categories = [ "nonfake", "fake" ]
            for genMatch_category in genMatch_categories:
              for signal_base, signal_input in self.signal_io.items():
                addBackgrounds_job_signal_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection, signal_base, genMatch_category)
                key_addBackgrounds_job_signal = getKey(*addBackgrounds_job_signal_tuple)
                if key_addBackgrounds_job_signal in self.jobOptions_addBackgrounds_sum.keys():
                  continue
                processes_input = signal_input
                process_output = signal_base
                if genMatch_category == "fake":
                  processes_input = [ process_input + "_fake" for process_input in processes_input ]
                  process_output += "_fake"
                  self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal] = {
                    'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
                    'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_signal_tuple),
                    'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % addBackgrounds_job_signal_tuple),
                    'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % addBackgrounds_job_signal_tuple),
                    'categories' : [ getHistogramDir(category, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, chargeSumSelection) ],
                    'processes_input' : processes_input,
                    'process_output' : process_output
                    }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal])
                  key_hadd_stage2_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
                  if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
                  if lepton_selection == "Tight":
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_signal]['outputFile'])

              # initialize input and output file names for hadd_stage2
              key_hadd_stage1_5_job = getKey(category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage2_dir = getKey("hadd", chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              hadd_stage2_job_tuple = (category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
              key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
              if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
              if lepton_selection == "Tight":
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])
              self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
              self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                              "hadd_stage2_%s_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for chargeSumSelection in self.chargeSumSelections:
      for category in self.evtCategories:
        key_hadd_stage1_5_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"))
        key_addFakes_dir = getKey("addBackgroundLeptonFakes")
        addFakes_job_tuple = (category, chargeSumSelection)
        key_addFakes_job = getKey("data_fakes", *addFakes_job_tuple)
        self.jobOptions_addFakes[key_addFakes_job] = {
          'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % addFakes_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % addFakes_job_tuple),
          'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % addFakes_job_tuple),
          'category_signal' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection),
          'category_sideband' : getHistogramDir(category, "Fakeable", "enabled", chargeSumSelection)
        }
        self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
        key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for chargeSumSelection in self.chargeSumSelections:
      for category in self.evtCategories:
        for histogramToFit in self.histograms_to_fit:
          key_hadd_stage2_job = getKey(category, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
          key_prep_dcard_dir = getKey("prepareDatacards")
          prep_dcard_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit)
          key_prep_dcard_job = getKey(category, chargeSumSelection, histogramToFit)
          self.jobOptions_prep_dcard[key_prep_dcard_job] = {
            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
            'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s_%s.root" % prep_dcard_job_tuple),
            'histogramDir' : getHistogramDir(category, "Tight", "disabled", chargeSumSelection),
            'histogramToFit' : histogramToFit
          }
          self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])
          # add shape templates for the following systematic uncertainties:
          #  - 'CMS_ttHl_Clos_norm_e'
          #  - 'CMS_ttHl_Clos_shape_e'
          #  - 'CMS_ttHl_Clos_norm_m'
          #  - 'CMS_ttHl_Clos_shape_m'
          key_add_syst_fakerate_dir = getKey("addSystFakeRates")
          add_syst_fakerate_job_tuple = (self.channel, category, chargeSumSelection, histogramToFit)
          key_add_syst_fakerate_job = getKey(category, chargeSumSelection, histogramToFit)
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
            'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
            'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
            'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
            'category' : category,
            'histogramToFit' : histogramToFit,
            'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
          }
          histogramDir_nominal = getHistogramDir(category, "Tight", "disabled", chargeSumSelection)
          for lepton_and_hadTau_type in [ 'e', 'm', 't' ]:
            lepton_and_hadTau_mcClosure = "Fakeable_mcClosure_%s" % lepton_and_hadTau_type
            if lepton_and_hadTau_mcClosure not in self.lepton_and_hadTau_selections:
              continue
            lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_mcClosure, "enabled")
            key_addBackgrounds_job_fakes = getKey("fakes_mc", category, chargeSumSelection, lepton_and_hadTau_selection_and_frWeight)
            histogramDir_mcClosure = self.mcClosure_dir[lepton_and_hadTau_mcClosure+'_%s' %chargeSumSelection]
            histogramDir_mcClosure = histogramDir_mcClosure.replace(self.evtCategory_inclusive, category)
            self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
              'add_Clos_%s' % lepton_and_hadTau_type : ("Fakeable_mcClosure_%s" % lepton_and_hadTau_type) in self.lepton_and_hadTau_selections,
              'inputFile_nominal_%s' % lepton_and_hadTau_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
              'histogramName_nominal_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
              'inputFile_mcClosure_%s' % lepton_and_hadTau_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
              'histogramName_mcClosure_%s' % lepton_and_hadTau_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
            })
          self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    for chargeSumSelection in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(self.evtCategory_inclusive, chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
      key_makePlots_dir = getKey("makePlots")
      key_makePlots_job = getKey(chargeSumSelection)
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_cfg.py" % (self.channel, chargeSumSelection)),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s.png" % (self.channel, chargeSumSelection)),
        'histogramDir' : getHistogramDir(self.evtCategory_inclusive, "Tight", "disabled", chargeSumSelection),
        'label' : '1l1tau',
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
      if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections: #TODO
        key_makePlots_job = getKey(chargeSumSelection)
        key_hadd_stage2 = getKey(chargeSumSelection, get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"))
        self.jobOptions_make_plots[key_makePlots_job] = {
          'executable' : self.executable_make_plots_mcClosure,
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
          'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_%s_cfg.py" % (self.channel, chargeSumSelection)),
          'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s_%s.png" % (self.channel, chargeSumSelection))
        }
        self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])
        
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_copyHistograms)
      self.sbatchFile_copyHistograms = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_copyHistograms_%s.py" % self.channel)
      self.createScript_sbatch_copyHistograms(self.executable_copyHistograms, self.sbatchFile_copyHistograms, self.jobOptions_copyHistograms)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch_addBackgrounds(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch_addFakes(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_copyHistograms(lines_makefile, make_target = "phony_copyHistograms", make_dependency = "phony_hadd_stage1")
    self.addToMakefile_backgrounds_from_data(lines_makefile, make_dependency = "phony_copyHistograms")
    #----------------------------------------------------------------------------                             
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done")

    return self.num_jobs
Exemplo n.º 13
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """
  
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_selection in self.lepton_selections:
        #lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
        key_dir = getKey(process_name, lepton_selection)
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
          initDict(self.dirs, [ key_dir, dir_type ])
          if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
              "_".join([ lepton_selection ]), process_name)
          else:
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
              "_".join([ lepton_selection ]), process_name)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs    

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)

    for lepton_selection in self.lepton_selections:
        for sample_name, sample_info in self.samples.items():
          if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
            continue
          process_name = sample_info["process_name_specific"]

          # Edit Siddh ~~~~~~~~~~~~~
          run_process = False 
          for sprocess_run in sample_process_run_s:    
            if sprocess_run == process_name:  
              run_process = True  
          #print "Run process: ", sprocess_run     
              
          if run_process == False:   
            continue       
          #print "run_process:",process_name    
          # ~~~~~~~~~~~~~~~~~~~~~~~~~~

          #if not ("DY" in process_name or "Muon" in process_name): continue
          logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))  
          sample_category = sample_info["sample_category"]
          is_mc = (sample_info["type"] == "mc")
          is_signal = (sample_category == "signal")
          inputFileList = inputFileLists[sample_name]
          for central_or_shift in self.central_or_shifts:
            for jobId in inputFileList.keys():
              #if central_or_shift != "central" and not (lepton_and_hadTau_selection.startswith("Tight") and lepton_charge_selection == "SS"):
              #  continue
              if central_or_shift != "central" and not is_mc:
                continue                

              # build config files for executing analysis code
              key_dir = getKey(process_name, lepton_selection)
              key_analyze_job = getKey(process_name, lepton_selection, central_or_shift, jobId)
              ntupleFiles = inputFileList[jobId]
              if len(ntupleFiles) == 0:
                print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_job, ntupleFiles)
                continue
              self.jobOptions_analyze[key_analyze_job] = {
                'ntupleFiles' : ntupleFiles,
                'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                  (self.channel, process_name, lepton_selection, central_or_shift, jobId)),
                'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                  (process_name, lepton_selection, central_or_shift, jobId)),
                'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                  (self.channel, process_name, lepton_selection, central_or_shift, jobId)),
                'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % \
                  (self.channel, process_name, lepton_selection, central_or_shift, jobId)) if self.select_rle_output else "",
                'sample_category' : sample_category,
                'triggers' : sample_info["triggers"],
                'lepton_selection' : lepton_selection,
                #'apply_leptonGenMatching' : self.apply_leptonGenMatching,
                #'apply_hadTauGenMatching' : self.apply_hadTauGenMatching,
                #'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled",
                'applyFakeRateWeights' : "disabled",
                'use_HIP_mitigation_mediumMuonId' : True,
                'is_mc' : is_mc,
                'central_or_shift' : central_or_shift,
                'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc
              }

              #applyFakeRateWeights = self.applyFakeRateWeights
              #if lepton_and_hadTau_frWeight == "disabled":
              #  applyFakeRateWeights = "disabled"
              self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job])

              # initialize input and output file names for hadd_stage1
              key_hadd_stage1 = getKey(process_name, lepton_selection)
              if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys():
                self.inputFiles_hadd_stage1[key_hadd_stage1] = []
              self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
              self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                  (self.channel, process_name, lepton_selection))
          print key_hadd_stage1, self.channel, process_name, lepton_selection, self.outputFile_hadd_stage1[key_hadd_stage1]

          #key_hadd_stage1 = getKey(process_name, lepton_selection)
          #key_hadd_stage1_5 = getKey(lepton_selection)
          #print self.inputFiles_hadd_stage1_5 
          #self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1])

          key_addBackgrounds_job = getKey(lepton_selection)
          sample_categories = []
          sample_categories.extend([ "signal" ])
          processes_input = []


          # initialize input and output file names for hadd_stage2
          key_hadd_stage2 = getKey(lepton_selection)
          if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys():
            self.inputFiles_hadd_stage2[key_hadd_stage2] = []
          #if lepton_selection == "Tight":
          #  self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
          #key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight)
          self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])
        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \
            (self.channel, lepton_selection))

    key_hadd_stage2 = getKey(lepton_selection)
    #self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    processesToCopy = []
    for process in self.prep_dcard_processesToCopy:
      processesToCopy.append(process)
    self.prep_dcard_processesToCopy = processesToCopy
    processesToCopy = []
    for process in self.prep_dcard_signals:
      processesToCopy.append(process)
    self.prep_dcard_signals = processesToCopy
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_job = getKey(histogramToFit)
      key_hadd_stage2 = getKey(lepton_selection)
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
        'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)),
        'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }                            
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    #self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    #self.addToMakefile_add_syst_dcard(lines_makefile)
    #self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)
  
    logging.info("Done")
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      process_name = sample_info["process_name_specific"]
      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
      central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
      for central_or_shift_or_dummy in central_or_shifts_extended:
        process_name_extended = [ process_name, "hadd" ]
        for process_name_or_dummy in process_name_extended:
          if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
            if not is_mc:
              continue
            if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info):
              continue
          
          key_dir = getKey(process_name_or_dummy, central_or_shift_or_dummy)
          for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
            initDict(self.dirs, [ key_dir, dir_type ])
            if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
              self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, process_name_or_dummy, central_or_shift_or_dummy)
            else:
              self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "prepareDatacards" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT, DKEY_PLOT, DKEY_COMBINE_OUTPUT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    self.inputFileIds = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]

      logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
      inputFileList = inputFileLists[sample_name]

      is_mc = (sample_info["type"] == "mc")
      sample_category = sample_info["sample_category"]

      for central_or_shift in self.central_or_shifts:
        if central_or_shift != "central" and not is_mc:
          continue

        if not self.accept_central_or_shift(central_or_shift, sample_info):
          continue

        key_analyze_dir = getKey(process_name, central_or_shift)

        for jobId in inputFileList.keys():

          analyze_job_tuple = (process_name, central_or_shift, jobId)
          key_analyze_job = getKey(*analyze_job_tuple)
          ntupleFiles = inputFileList[jobId]
          if len(ntupleFiles) == 0:
            logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
            continue

          rleOutputFile = os.path.join(
            self.dirs[key_analyze_dir][DKEY_RLES],
            "rle_{channel}_{process_name}_{central_or_shift}_{jobId}_%s_%s.txt".format(
              channel          = self.channel,
              process_name     = process_name,
              central_or_shift = central_or_shift,
              jobId            = jobId,
            )) if self.select_rle_output else ""

          cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple)
          logFile_path          = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % analyze_job_tuple)
          histogramFile_path    = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%i.root" % analyze_job_tuple)
          self.jobOptions_analyze[key_analyze_job] = {
            'ntupleFiles'              : ntupleFiles,
            'cfgFile_modified'         : cfgFile_modified_path,
            'histogramFile'            : histogramFile_path,
            'selEventsFileName_output' : rleOutputFile,
            'logFile'                  : logFile_path,
            'absEtaBins_e'             : self.absEtaBins_e,
            'ptBins_e'                 : self.ptBins_e,
            'absEtaBins_mu'            : self.absEtaBins_mu,
            'ptBins_mu'                : self.ptBins_mu,
            'central_or_shift'         : central_or_shift,
            'fillGenEvtHistograms'     : self.fillGenEvtHistograms,
            'triggers_mu_cfg'          : "leptonFR_triggers['{}']['{}']".format(self.era, 'mu'),
            'triggers_e_cfg'           : "leptonFR_triggers['{}']['{}']".format(self.era, 'e'),
            'lep_mva_cut_e'            : float(self.lep_mva_cut_e),
            'lep_mva_cut_mu'           : float(self.lep_mva_cut_mu),
          }
          self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info)

          # initialize input and output file names for hadd_stage1
          key_hadd_stage1_dir = getKey(process_name)
          key_hadd_stage1_job = getKey(process_name)
          if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
            self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
          self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
          self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                          "hadd_stage1_%s.root" % process_name)

    # initialize input and output file names for hadd_stage1_5
    key_hadd_stage1_5_dir = getKey("hadd")
    key_hadd_stage1_5_job = getKey('')
    if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
      self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
    for key_hadd_stage1_job in self.outputFile_hadd_stage1.keys():
      self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
    self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST], "hadd_stage1_5.root" )

    # sum fake contributions for the total of all MC samples
    # input processes: TTj,... ## HERE !!
    # output process: fakes_mc
    key_hadd_stage1_5_job = getKey('')
    key_addBackgrounds_dir = getKey("addBackgrounds")
    key_addBackgrounds_job_sum = getKey("fakes_mc")
    sample_categories = []
    sample_categories.extend(self.nonfake_backgrounds)
    sample_categories.extend(self.ttHProcs)
    processes_input = []
    for sample_category in sample_categories:
      processes_input.append("%sj" % sample_category)
      self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_cfg.py" % "fakes_mc"),
        'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s.root" % "fakes_mc"),
        'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s.log" % "fakes_mc"),
        'categories' : [
          "LeptonFakeRate/numerator/electrons_tight",
          "LeptonFakeRate/denominator/electrons_fakeable",
          "LeptonFakeRate/numerator/muons_tight",
          "LeptonFakeRate/denominator/muons_fakeable"
        ],
        'processes_input' : processes_input,
        'process_output' : "fakes_mc",
        'histogramsToCopy' : list(self.histograms_to_fit.keys()),
        'sysShifts' : []
      }
      self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum])

    # create configuration files to run 'addBackgrounds_LeptonFakeRate'
    key_addBackgrounds_job_leptonFR = getKey('')
    self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR] = {
      'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], os.path.basename(self.cfgFile_addBackgrounds_LeptonFakeRate)),
      'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackground_LeptonFakeRate.root"),
      'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(self.cfgFile_addBackgrounds_LeptonFakeRate.replace("_cfg.py", ".log")) ),
    }
    self.createCfg_addBackgrounds_LeptonFakeRate(self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR])

    # create configuration files to run 'addBackgrounds_Convs_LeptonFakeRate'
    key_addBackgrounds_job_conv = getKey('')
    self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv] = {
      'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
      'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], os.path.basename(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate)),
      'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackground_Convs_LeptonFakeRate.root"),
      'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(self.cfgFile_addBackgrounds_Convs_LeptonFakeRate.replace("_cfg.py", ".log")) ),
    }
    self.createCfg_addBackgrounds_Convs_LeptonFakeRate(self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv])


    # initialize input and output file names for hadd_stage2
    key_hadd_stage2_dir = getKey("hadd")
    key_hadd_stage2_job = getKey('')
    if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
      self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
    # CV: hadd_stage_1_5 output file does not need to be added as input for hadd_stage_2,
    #     as addBackgrounds_LeptonFakeRate output file contains all histograms except fakes_mc
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_sum]['outputFile'])
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job_leptonFR]['outputFile'])
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_Convs_LeptonFakeRate[key_addBackgrounds_job_conv]['outputFile'])
    self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST], "hadd_stage2.root")

    # We need to generate the eta and pt bins for electrons and muons
    lepton_bins = {}
    categories = []
    for lepton in ['electron', 'muon']:
      if lepton not in lepton_bins:
        lepton_bins[lepton] = {}

      absEtaBins = None
      ptBins = None
      lepton_short = None
      if lepton == 'electron':
        absEtaBins = self.absEtaBins_e
        ptBins = self.ptBins_e
        lepton_short = 'e'
      elif lepton == 'muon':
        absEtaBins = self.absEtaBins_mu
        ptBins = self.ptBins_mu
        lepton_short = 'mu'
      else:
        raise ValueError('Invalid lepton type: %s' % lepton)
      for selection in ['tight', 'fakeable']:
        if selection not in lepton_bins[lepton]:
          lepton_bins[lepton][selection] = []
        num_or_den = None
        if selection == 'tight':
          num_or_den = 'numerator'
        elif selection == 'fakeable':
          num_or_den = 'denominator'
        else:
          raise ValueError('Invalid lepton selection: %s' % selection)
        for absEtaBin_idx in range(0, len(absEtaBins) - 1):
          absEtaBinLowerEdge = absEtaBins[absEtaBin_idx]
          absEtaBinUpperEdge = absEtaBins[absEtaBin_idx + 1]
          absEtaBinString = getEtaBin(absEtaBinLowerEdge, absEtaBinUpperEdge)
          for ptBin_idx in range(0, len(ptBins) - 1):
            ptBinsLowerEdge = ptBins[ptBin_idx]
            ptBinsUpperEdge = ptBins[ptBin_idx + 1]
            ptBinString = getPtBin(ptBinsLowerEdge, ptBinsUpperEdge)
            absEta_and_ptBinString = '%s_%s' % (absEtaBinString, ptBinString)

            lepton_bins[lepton][selection].append(
              construct_lepton_params(
                lepton, lepton_short, selection, absEta_and_ptBinString,
                error_msg = "No fit parameter range specified for abs(eta) range = (%.3f, %.3f) and "
                            "pT range = (%.3f, %.3f) for lepton type '%s' !!" % \
                            (absEtaBinLowerEdge, absEtaBinUpperEdge, ptBinsLowerEdge, ptBinsUpperEdge, lepton)
              ) + (absEtaBinLowerEdge, absEtaBinUpperEdge, ptBinsLowerEdge, ptBinsUpperEdge, 0)
            )

            categories.append(
              (
                "LeptonFakeRate/%s/%ss_%s/%s/%s" % (num_or_den, lepton, selection, absEtaBinString, ptBinString),
                "%ss_%s_%s_shapes" % (lepton, selection, absEta_and_ptBinString),
              )
            )

        # Let's also add inclusive category
        lepton_bins[lepton][selection].append(
          construct_lepton_params(
            lepton, lepton_short, selection, 'incl',
            error_msg = "No fit parameter range specified for lepton type %s" % lepton
          ) + (-1., -1., -1., -1., 1)
        )
        categories.append(
          (
            "LeptonFakeRate/%s/%ss_%s/incl" % (num_or_den, lepton, selection),
            "%ss_%s_incl_shapes" % (lepton, selection),
          )
        )
    lepton_bins_merged = []
    for lepton_type in lepton_bins:
      for lepton_selection in lepton_bins[lepton_type]:
        lepton_bins_merged.extend(lepton_bins[lepton_type][lepton_selection])

    if self.prep_dcard:
      logging.info("Creating configuration files to run 'prepareDatacards_LeptonFakeRate'")
      datacards = []
      for histogramToFit in self.histograms_to_fit:
        key_prep_dcard_dir = getKey("prepareDatacards")
        key_prep_dcard_job = getKey(histogramToFit)
        datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % (histogramToFit))
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_LeptonFakeRate_%s_cfg.py" % histogramToFit),
          'datacardFile' : datacard,
          'histogramDir' : (self.histogramDir_prep_dcard),
          'histogramToFit' : histogramToFit,
          'label' : None,
          'categories' : categories,
        }
        datacards.append(datacard)
        self.createCfg_prep_dcard_LeptonFakeRate(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # Create setupDatacards_LeptonFakeRate.py script from the template
      systematics_leptonFR = []
      for systematic in self.central_or_shifts:
        if systematic == 'central':
          continue
        systematic_name = systematic.replace('Up', '').replace('Down', '')
        if systematic_name not in systematics_leptonFR:
          systematics_leptonFR.append(systematic_name)
      setup_dcards_template_file = os.path.join(jinja_template_dir, 'setupDatacards_LeptonFakeRate.py.template')
      with open(setup_dcards_template_file, 'r') as setup_dcards_template_file_ptr:
        setup_dcards_template = setup_dcards_template_file_ptr.read()
      setup_dcards_script = jinja2.Template(setup_dcards_template).render(
        leptons           = lepton_bins_merged,
        central_or_shifts = systematics_leptonFR,
        signal_process    = "QCD" if self.use_QCD_fromMC else "data_fakes",
      )
      setup_dcards_script_path = os.path.join(self.dirs[DKEY_SCRIPTS], 'setupDatacards_LeptonFakeRate.py')
      logging.debug("writing setupDatacards_LeptonFakeRate script file = '%s'" % setup_dcards_script_path)
      with codecs.open(setup_dcards_script_path, "w", "utf-8") as setup_dcards_script_file:
        setup_dcards_script_file.write(setup_dcards_script)
        setup_dcards_script_file.flush()
        os.fsync(setup_dcards_script_file.fileno())
      add_chmodX(setup_dcards_script_path)

      if self.use_QCD_fromMC:
          postfit_plot_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/postFitPlot_fakes_from_mc.py')
          yieldtable_script_path   = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/yieldTable_fakes_from_mc.py')
      else:
          postfit_plot_script_path = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/postFitPlot_fakes_from_data.py')
          yieldtable_script_path   = os.path.join(os.environ['CMSSW_BASE'], 'src/tthAnalysis/HiggsToTauTau/data/leptonFR/scripts/yieldTable_fakes_from_data.py')

      # Create run_postFit.sh script from the template
      combine_output_dir = os.path.join(self.dirs[DKEY_COMBINE_OUTPUT], 'output')
      postfit_template_file = os.path.join(jinja_template_dir, 'run_postFit.sh.template')
      with open(postfit_template_file, 'r') as postfit_template_file_ptr:
        postfit_template = postfit_template_file_ptr.read()
      for lepton in ['electron', 'muon']:
        for selection in ['fakeable', 'tight']:
          is_num = selection == 'tight'
          for params in lepton_bins[lepton][selection]:
            l_array, l_range, l_sub_dir, l_eta_low, l_eta_high, l_pt_low, l_pt_high, l_is_inclusive = params
            postfit_script = jinja2.Template(postfit_template).render(
              new_cmssw_base         = self.cmssw_base_dir_combine,
              setup_dcards_script    = setup_dcards_script_path,
              postfit_plot_script    = postfit_plot_script_path,
              int_lumi_data          = self.lumi,
              yieldtable_script      = yieldtable_script_path,
              output_dir             = combine_output_dir,
              numerator_plotLabel    = self.numerator_plotLabel,
              denominator_plotLabel  = self.denominator_plotLabel,
              l_array                = l_array,
              l_range                = l_range,
              l_sub_dir              = l_sub_dir,
              l_eta_low              = l_eta_low,
              l_eta_high             = l_eta_high,
              l_pt_low               = l_pt_low,
              l_pt_high              = l_pt_high,
              l_is_inclusive         = l_is_inclusive,
              is_num                 = is_num,
              numerator_output_dir   = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.numerator_histogram),
              denominator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.denominator_histogram),
              selection              = selection,
              lepton_letter          = 'e' if lepton == 'electron' else 'mu',
              grep_value             = "QCD" if self.use_QCD_fromMC else "data_fakes",
            )
            postfit_script_path = os.path.join(
              self.dirs[DKEY_SCRIPTS],
              'mlfit_%s_%s.sh' % (self.numerator_histogram if is_num else self.denominator_histogram, l_array)
            )
            logging.debug("Writing run_postFit script file = '%s'" % postfit_script_path)
            with codecs.open(postfit_script_path, "w", "utf-8") as postfit_script_file:
              postfit_script_file.write(postfit_script)
              postfit_script_file.flush()
              os.fsync(postfit_script_file.fileno())
            add_chmodX(postfit_script_path)

      key_prep_dcard_dir = getKey("prepareDatacards")
      fit_value_file = os.path.join(combine_output_dir, 'fit_values.txt')
      makefile_template_file = os.path.join(jinja_template_dir, 'Makefile_postFit.template')
      makefile_template = open(makefile_template_file, 'r').read()
      makefile_templatized = jinja2.Template(makefile_template).render(
        new_cmssw_base = self.cmssw_base_dir_combine,
        setup_dcards_script = setup_dcards_script_path,
        numerator_histogram = self.numerator_histogram,
        denominator_histogram = self.denominator_histogram,
        scripts_dir = self.dirs[DKEY_SCRIPTS],
        numerator_datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % self.numerator_histogram),
        denominator_datacard = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s.root" % self.denominator_histogram),
        output_dir = combine_output_dir,
        numerator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.numerator_histogram),
        denominator_output_dir = os.path.join(combine_output_dir, 'mlfit_LeptonFakeRate_%s' % self.denominator_histogram),
        lepton_bins = lepton_bins,
        fit_values = fit_value_file,
      )
      makefile_path = os.path.join(self.dirs[DKEY_SCRIPTS], 'Makefile_postFit')
      logging.debug("Writing run_postFit script file = '%s'" % makefile_path)
      with codecs.open(makefile_path, "w", "utf-8") as makefile_path_file:
        makefile_path_file.write(makefile_templatized)
        makefile_path_file.flush()
        os.fsync(makefile_path_file.fileno())

      self.jobOptions_combine = {
        'inputFile'       : ' '.join(datacards),
        'outputFile'      : fit_value_file,
        'makefile_path'   : makefile_path,
        'logFile'         : os.path.join(self.dirs[DKEY_LOGS], 'postFit.log'),
      }

      key_comp_LeptonFakeRate = getKey('')
      leptonFR_final_output = os.path.join(combine_output_dir, 'leptonFakeRates.root')
      self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate] = {
        'inputFile'            : [ fit_value_file, self.outputFile_hadd_stage2[key_hadd_stage2_job] ],
        'outputFile'           : leptonFR_final_output,
        'absEtaBins_e'         : self.absEtaBins_e,
        'ptBins_e'             : self.ptBins_e,
        'absEtaBins_mu'        : self.absEtaBins_mu,
        'ptBins_mu'            : self.ptBins_mu,
        'logFile'              : os.path.join(self.dirs[DKEY_LOGS], os.path.basename(self.cfgFile_comp_LeptonFakeRate).replace('_cfg.py', '.log')),
        'cfgFile_modified'     : os.path.join(self.dirs[DKEY_CFGS], os.path.basename(self.cfgFile_comp_LeptonFakeRate)),
        'plots_outputFileName' : os.path.join(self.dirs[DKEY_PLOT], "comp_LeptonFakeRate.png")
      }
      self.createCfg_comp_LeptonFakeRate(self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate])
      self.targets.append(self.jobOptions_comp_LeptonFakeRate[key_comp_LeptonFakeRate]['outputFile'])

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_LeptonFakeRate.py")
    self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_LeptonFakeRate.py")
    self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_LeptonFakeRate.py")
    self.sbatchFile_addBackgrounds_Convs_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_Convs_LeptonFakeRate.py")
    self.sbatchFile_comp_LeptonFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_LeptonFakeRate.py")
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      self.createScript_sbatch(self.executable_addBackgrounds_recursively, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      self.createScript_sbatch(self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_LeptonFakeRate, self.jobOptions_addBackgrounds_LeptonFakeRate)
      self.createScript_sbatch(self.executable_addBackgrounds_LeptonFakeRate, self.sbatchFile_addBackgrounds_Convs_LeptonFakeRate, self.jobOptions_addBackgrounds_Convs_LeptonFakeRate)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_LeptonFakeRate)
      self.createScript_sbatch(self.executable_comp_LeptonFakeRate, self.sbatchFile_comp_LeptonFakeRate, self.jobOptions_comp_LeptonFakeRate)

    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile) ## this step now does both e Conv, data_fakes and fakes_mc computation
    # self.addToMakefile_backgrounds_from_MC(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = " ".join([
      "phony_addBackgrounds_LeptonFakeRate", "phony_addBackgrounds_Convs_LeptonFakeRate", "phony_addBackgrounds_sum"
    ]))
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_combine(lines_makefile)
    self.addToMakefile_comp_LeptonFakeRate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 15
0
    def __init__(
        self,
        configDir,
        outputDir,
        cfgFile_prodNtuple,
        samples,
        max_files_per_job,
        era,
        preselection_cuts,
        leptonSelection,
        hadTauWP,
        check_output_files,
        running_method,
        version,
        num_parallel_jobs,
        pileup,
        golden_json,
        dry_run,
        isDebug,
        gen_matching_by_index,
        use_nonnominal,
        use_home,
        skip_tools_step,
        verbose=False,
        pool_id='',
    ):

        self.configDir = configDir
        self.outputDir = outputDir
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.preselection_cuts = preselection_cuts
        self.leptonSelection = leptonSelection
        self.hadTauWP = hadTauWP
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.isDebug = isDebug
        self.gen_matching_by_index = gen_matching_by_index
        self.use_nonnominal = use_nonnominal
        self.use_home = use_home
        self.pileup = pileup
        self.golden_json = golden_json
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        if not os.path.isfile(self.pileup):
            raise ValueError('No such file: %s' % self.pileup)
        self.pileup_histograms = get_pileup_histograms(self.pileup)

        if not os.path.isfile(self.golden_json):
            raise ValueError('No such file: %s' % self.golden_json)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(self.configDir, "Makefile_prodNtuple")
        self.num_parallel_jobs = num_parallel_jobs
        self.skip_tools_step = skip_tools_step
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        self.version = version
        self.samples = samples

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file_path = os.path.join(self.configDir,
                                             "stdout_prodNtuple.log")
        self.stderr_file_path = os.path.join(self.configDir,
                                             "stderr_prodNtuple.log")
        self.sw_ver_file_cfg = os.path.join(self.configDir,
                                            "VERSION_prodNtuple.log")
        self.sw_ver_file_out = os.path.join(self.outputDir,
                                            "VERSION_prodNtuple.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out))

        self.cfgFile_prodNtuple_original = os.path.join(
            self.template_dir, cfgFile_prodNtuple)
        self.sbatchFile_prodNtuple = os.path.join(self.configDir,
                                                  "sbatch_prodNtuple.py")
        self.cfgFiles_prodNtuple_modified = {}
        self.logFiles_prodNtuple = {}

        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []
        self.dirs = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in [DKEY_CFGS, DKEY_LOGS]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type)

        self.cvmfs_error_log = {}
        self.executable = "produceNtuple.sh"
Exemplo n.º 16
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for hadTau_charge_selection in self.hadTau_charge_selections:
            key_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                  "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name)
              else:
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                  "_".join([ lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection ]), process_name)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_name, sample_info, self.max_files_per_job, self.debug)

    for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
      lepton_selection = lepton_and_hadTau_selection
      if self.applyFakeRateWeights == "2tau":
        lepton_selection = "Tight"
      hadTau_selection = "|".join([ lepton_and_hadTau_selection, self.hadTau_selection_part2 ])

      if lepton_and_hadTau_selection == "forBDTtraining":
        lepton_selection = "Loose"
        hadTau_selection = "Tight|%s" % self.hadTau_selection_relaxed

      for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
        if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
          continue
        if lepton_and_hadTau_frWeight == "disabled" and not lepton_and_hadTau_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)

        for hadTau_charge_selection in self.hadTau_charge_selections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            is_signal = (sample_category == "signal")

            for central_or_shift in self.central_or_shifts:

              inputFileList = inputFileLists[sample_name]
              for jobId in inputFileList.keys():
                if central_or_shift != "central":
                  isFR_shape_shift = False
                  for FR_shape_shift in [
                    "CMS_ttHl_FRe_shape",
                    "CMS_ttHl_FRm_shape",
                    "CMS_ttHl_FRjt_norm",
                    "CMS_ttHl_FRjt_shape" ]:
                    if central_or_shift.find(FR_shape_shift) != -1:
                      isFR_shape_shift = True
                  if not ((lepton_and_hadTau_selection == "Fakeable" and hadTau_charge_selection == "OS" and isFR_shape_shift) or
                          (lepton_and_hadTau_selection == "Tight"    and hadTau_charge_selection == "OS")):
                    continue
                  if not is_mc and not isFR_shape_shift:
                    continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttH") and sample_category != "signal":
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttW") and sample_category != "TTW":
                  continue
                if central_or_shift.startswith("CMS_ttHl_thu_shape_ttZ") and sample_category != "TTZ":
                  continue
                ##print "processing sample %s: jobId = %i, central_or_shift = '%s'" % (process_name, jobId, central_or_shift)

                # build config files for executing analysis code
                key_dir = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                key_analyze_job = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (key_file, ntupleFiles)
                  continue

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles' : ntupleFiles,
                  'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%s_%i_cfg.py" % \
                    (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)),
                  'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%s_%i.root" % \
                    (process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)),
                  'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%s_%i.log" % \
                    (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)),
                  'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%s_%i.txt" % \
                    (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection, central_or_shift, jobId)) if self.select_rle_output else "",
                  'sample_category' : sample_category,
                  'process_name_specific' : sample_info["process_name_specific"],
                  'triggers' : sample_info["triggers"],
                  'lepton_selection' : lepton_selection,
                  'apply_leptonGenMatching' : self.apply_leptonGenMatching,
                  'hadTau_selection' : hadTau_selection,
                  'apply_hadTauGenMatching' : self.apply_hadTauGenMatching,
                  'hadTau_charge_selection' : hadTau_charge_selection,
                  'applyFakeRateWeights' : self.applyFakeRateWeights if not (lepton_selection == "Tight" and hadTau_selection.find("Tight") != -1) else "disabled",
                  ##'use_HIP_mitigation_bTag' : sample_info["use_HIP_mitigation_bTag"],
                  ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                  'use_HIP_mitigation_bTag' : True,
                  'use_HIP_mitigation_mediumMuonId' : True,
                  'is_mc' : is_mc,
                  'central_or_shift' : central_or_shift,
                  'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                  'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
                  'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                  'selectBDT': self.isBDTtraining,
                  'changeBranchNames' : self.changeBranchNames
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job])

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \
                    (self.channel, process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))

            if self.isBDTtraining:
              continue

            if is_mc:
              logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name)

              sample_categories = [ sample_category ]
              if is_signal:
                sample_categories = [ "signal", "ttH", "ttH_htt", "ttH_hww", "ttH_hzz" ]
              for sample_category in sample_categories:
                # sum non-fake contributions for each MC sample separately
                # input processes: TT2t0e0m0j, TT1t1e0m0j, TT1t0e1m0j", TT0t2e0m0j, TT0t1e1m0j, TT0t0e2m0j; TTW2t0e0m0j,...
                # output processes: TT; ...
                key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                key_addBackgrounds_job = getKey(process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                processes_input = None
                if sample_category in [ "signal" ]:
                  lepton_and_hadTau_genMatches = []
                  lepton_and_hadTau_genMatches.extend(self.lepton_and_hadTau_genMatches_nonfakes)
                  lepton_and_hadTau_genMatches.extend(self.lepton_and_hadTau_genMatches_fakes)
                  processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in lepton_and_hadTau_genMatches ]
                elif sample_category in [ "ttH" ]:
                  processes_input = []
                  processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ])
                  processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ])
                  processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ])
                else:
                  processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_nonfakes ]
                if processes_input:
                  self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                    'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1],
                    'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_%s_cfg.py" % \
                      (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
                    'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_%s_%s_%s_%s.root" % \
                      (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
                    'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_%s_%s_%s_%s.log" % \
                      (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
                    'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ],
                    'processes_input' : processes_input,
                    'process_output' : sample_category
                  }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                  # initialize input and output file names for hadd_stage1_5
                  key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                  if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5:
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = []
                  self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                  self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_5_%s_%s_%s.root" % \
                    (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))

                # sum fake contributions for each MC sample separately
                # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,...
                # output processes: TT_fake; ...
                key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                key_addBackgrounds_job = getKey(process_name, "%s_fake" % sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                processes_input = None
                if sample_category in [ "signal" ]:
                  processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]
                elif sample_category in [ "ttH" ]:
                  processes_input = []
                  processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ])
                  processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ])
                  processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ])
                else:
                  processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_and_hadTau_genMatches_fakes ]
                if processes_input:
                  self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                    'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1],
                    'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_fakes_%s_%s_%s_%s_cfg.py" % \
                      (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
                    'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_fakes_%s_%s_%s_%s.root" % \
                      (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
                    'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_fakes_%s_%s_%s_%s.log" % \
                      (self.channel, process_name, sample_category, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
                    'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ],
                    'processes_input' : processes_input,
                    'process_output' : "%s_fake" % sample_category
                  }
                  self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                  # initialize input and output file names for hadd_stage1_5
                  key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
                  if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5:
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = []
                  self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                  self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_5_%s_%s_%s.root" % \
                    (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))

            if self.isBDTtraining:
              continue

            # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
            if not is_mc:
              key_hadd_stage1 = getKey(process_name, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
              key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
              if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5:
                self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = []
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(self.outputFile_hadd_stage1[key_hadd_stage1])

          if self.isBDTtraining:
            continue

          # sum fake contributions for the total of all MC sample
          # input processes: TT1t0e0m1j, TT0t1e0m1j, TT0t0e1m1j, TT0t0e0m2j; TTW1t0e0m1j,...
          # output process: fakes_mc
          key_addBackgrounds_job = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend([ "signal" ])
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5],
            'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgrounds_%s_fakes_mc_%s_%s_cfg.py" % \
              (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
            'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgrounds_%s_fakes_mc_%s_%s.root" % \
              (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
            'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgrounds_%s_fakes_mc_%s_%s.log" % \
              (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)),
            'categories' : [ getHistogramDir(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, hadTau_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage2 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
          if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2] = []
          if lepton_and_hadTau_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job]['outputFile'])
          key_hadd_stage1_5 = getKey(lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection)
          self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5])
          self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s_%s.root" % \
            (self.channel, lepton_and_hadTau_selection_and_frWeight, hadTau_charge_selection))

    if self.isBDTtraining:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_analyze(lines_makefile)
      self.addToMakefile_hadd_stage1(lines_makefile)
      self.createMakefile(lines_makefile)
      logging.info("Done")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for hadTau_charge_selection in self.hadTau_charge_selections:
      key_addFakes_job = getKey("fakes_data", hadTau_charge_selection)
      key_hadd_stage1_5 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Fakeable", "enabled"), hadTau_charge_selection)
      category_sideband = None
      if self.applyFakeRateWeights == "3L":
        category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection
      elif self.applyFakeRateWeights == "2tau":
        category_sideband = "1l_2tau_%s_Fakeable_wFakeRateWeights" % hadTau_charge_selection
      else:
        raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5],
        'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "addBackgroundLeptonFakes_%s_%s_cfg.py" % \
          (self.channel, hadTau_charge_selection)),
        'outputFile' : os.path.join(self.dirs[DKEY_HIST], "addBackgroundLeptonFakes_%s_%s.root" % \
          (self.channel, hadTau_charge_selection)),
        'logFile' : os.path.join(self.dirs[DKEY_LOGS], "addBackgroundLeptonFakes_%s_%s.log" % \
          (self.channel, hadTau_charge_selection)),
        'category_signal' : "1l_2tau_%s_Tight" % hadTau_charge_selection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), hadTau_charge_selection)
      self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_job = getKey(histogramToFit)
      key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
        'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % (self.channel, histogramToFit)),
        'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_%s.root" % (self.channel, histogramToFit)),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])
      if "SS" in self.hadTau_charge_selections:
        key_prep_dcard_job = getKey(histogramToFit, "SS")
        key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS")
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
          'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "prepareDatacards_%s_SS_%s_cfg.py" % (self.channel, histogramToFit)),
          'datacardFile' : os.path.join(self.dirs[DKEY_DCRD], "prepareDatacards_%s_SS_%s.root" % (self.channel, histogramToFit)),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : 'SS'
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_job = getKey("OS")
    key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
    self.jobOptions_make_plots[key_makePlots_job] = {
      'executable' : self.executable_make_plots,
      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
      'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
      'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel),
      'histogramDir' : self.histogramDir_prep_dcard,
      'label' : None,
      'make_plots_backgrounds' : self.make_plots_backgrounds
    }
    self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.hadTau_charge_selections:
      key_makePlots_job = getKey("SS")
      key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
        'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "SS",
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_and_hadTau_selections:
      key_makePlots_job = getKey("OS")
      key_hadd_stage2 = getKey(get_lepton_and_hadTau_selection_and_frWeight("Tight", "disabled"), "OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
        'cfgFile_modified' : os.path.join(self.dirs[DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done")

    return self.num_jobs
Exemplo n.º 17
0
    def __init__(
        self,
        configDir,
        localDir,
        outputDir,
        output_file,
        executable,
        samples,
        max_files_per_job,
        era,
        binning,
        use_gen_weight,
        check_output_files,
        running_method,
        num_parallel_jobs,
        pool_id='',
        verbose=False,
        dry_run=False,
        use_home=False,
        keep_logs=False,
        submission_cmd=None,
    ):

        self.configDir = configDir
        self.localDir = localDir
        self.outputDir = outputDir
        self.executable = executable
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.binning = binning
        self.use_gen_weight = use_gen_weight
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.use_home = use_home
        self.keep_logs = keep_logs
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(self.localDir, "Makefile_nonResDenom")
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.localDir)
        create_if_not_exists(self.outputDir)
        self.output_file = os.path.join(self.outputDir, output_file)
        self.stdout_file_path = os.path.join(self.localDir,
                                             "stdout_nonResDenom.log")
        self.stderr_file_path = os.path.join(self.localDir,
                                             "stderr_nonResDenom.log")
        self.sw_ver_file_cfg = os.path.join(self.localDir,
                                            "VERSION_nonResDenom.log")
        self.sw_ver_file_out = os.path.join(self.outputDir,
                                            "VERSION_nonResDenom.log")
        self.submission_out = os.path.join(self.localDir,
                                           "SUBMISSION_nonResDenom.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out))
        check_submission_cmd(self.submission_out, submission_cmd)

        self.sbatchFile_nonResDenom = os.path.join(self.localDir,
                                                   "sbatch_nonResDenom.py")
        self.cfgFiles_nonResDenom = {}
        self.logFiles_nonResDenom = {}
        self.scriptFiles_nonResDenom = {}
        self.jobOptions_sbatch = {}

        self.inputFiles = {}
        self.outputFiles_tmp = {}
        self.outputFiles = {}

        self.phoniesToAdd = []
        self.filesToClean = [self.configDir]
        self.targets = []

        self.dirs = {}
        all_dirs = [
            DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS,
            DKEY_SCRIPTS, DKEY_HADD_RT
        ]
        cfg_dirs = [
            DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT
        ]

        self.gen_weights = {}
        if self.use_gen_weight:
            ref_genweights = os.path.join(os.environ['CMSSW_BASE'], 'src',
                                          'tthAnalysis', 'HiggsToTauTau',
                                          'data',
                                          'refGenWeight_{}.txt'.format(era))
            with open(ref_genweights, 'r') as f:
                for line in f:
                    line_split = line.strip().split()
                    assert (len(line_split) == 2)
                    sample_name = line_split[0]
                    ref_genweight = float(line_split[1])
                    assert (sample_name not in self.gen_weights)
                    self.gen_weights[sample_name] = ref_genweight

        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue
            process_name = sample_info["process_name_specific"]
            if self.use_gen_weight:
                assert (re.sub('_duplicate$', '', process_name)
                        in self.gen_weights)
            key_dir = getKey(process_name)
            for dir_type in all_dirs:
                if dir_type == DKEY_PLOTS:
                    continue
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in cfg_dirs:
                    dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir
                    self.dirs[key_dir][dir_type] = os.path.join(
                        dir_choice, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in cfg_dirs:
            initDict(self.dirs, [dir_type])
            dir_choice = self.configDir if dir_type == DKEY_CFGS else self.localDir
            self.dirs[dir_type] = os.path.join(dir_choice, dir_type)
            if dir_choice != self.configDir:
                self.filesToClean.append(self.dirs[dir_type])

        self.cvmfs_error_log = {}
        self.num_jobs = {
            'hadd': 0,
            'nonResDenom': 0,
            'plot': 0,
        }
Exemplo n.º 18
0
    def __init__(self,
                 treeName,
                 outputDir,
                 cfgDir,
                 executable_addMEM,
                 samples,
                 era,
                 debug,
                 running_method,
                 max_files_per_job,
                 mem_integrations_per_job,
                 max_mem_integrations,
                 num_parallel_jobs,
                 leptonSelection,
                 hadTauSelection,
                 isForBDTtraining,
                 channel,
                 pool_id=''):

        self.treeName = treeName
        self.outputDir = outputDir
        self.cfgDir = cfgDir
        self.executable_addMEM = executable_addMEM
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.samples = samples
        self.era = era
        self.debug = debug
        self.channel = channel
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        self.hadTauDefinition = self.hadTauSelection.split('|')[0]
        self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1]
        self.maxPermutations_branchName = "maxPermutations_addMEM_%s_lep%s_tau%s_%s" % (
            self.channel,
            self.leptonSelection,
            self.hadTauDefinition,
            self.hadTauWorkingPoint,
        )
        self.isForBDTtraining = isForBDTtraining
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.cfgDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: {workingDir}".format(
            workingDir=self.workingDir))

        for dirPath in [self.outputDir, self.cfgDir]:
            create_if_not_exists(dirPath)
        self.stdout_file = codecs.open(
            os.path.join(self.cfgDir, "stdout_%s.log" % self.channel), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.cfgDir, "stderr_%s.log" % self.channel), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFiles_addMEM_modified = {}
        self.shFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(
            self.cfgDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or \
                sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(
                    self.outputDir, dir_type, self.channel, process_name)
            for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(
                    self.cfgDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
Exemplo n.º 19
0
  def __init__(self, outputDir, executable_analyze, cfgFile_analyze_original, samples, lepton_charge_selections, hadTau_selection, applyFakeRateWeights, central_or_shifts,
               max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
               executable_addBackgrounds, executable_addFakes, executable_addFlips, histograms_to_fit, select_rle_output = False, executable_prep_dcard="prepareDatacard"):
    analyzeConfig.__init__(self, outputDir, executable_analyze, "2lss_1tau", central_or_shifts,
      max_files_per_job, era, use_lumi, lumi, debug, running_method, num_parallel_jobs, 
      histograms_to_fit)

    self.samples = samples

    self.lepton_and_hadTau_selections = [ "Tight", "Fakeable", "Fakeable_mcClosure" ]
    self.lepton_and_hadTau_frWeights = [ "enabled", "disabled" ]
    self.lepton_charge_selections = lepton_charge_selections
    self.hadTau_selection_part2 = hadTau_selection
    self.applyFakeRateWeights = applyFakeRateWeights
        
    self.lepton_genMatches = [ "2l0j", "1l1j", "0l2j" ]
    self.hadTau_genMatches = [ "1t0e0m0j", "0t1e0m0j", "0t0e1m0j", "0t0e0m1j" ]

    self.apply_leptonGenMatching = None
    self.apply_hadTauGenMatching = None
    self.lepton_and_hadTau_genMatches_nonfakes = []
    self.lepton_and_hadTau_genMatches_fakes = []
    if self.applyFakeRateWeights == "3L":
      self.apply_leptonGenMatching = True
      self.apply_hadTauGenMatching = True
      for lepton_genMatch in self.lepton_genMatches:
        for hadTau_genMatch in self.hadTau_genMatches:
          lepton_and_hadTau_genMatch = "&".join([ lepton_genMatch, hadTau_genMatch ])
          if lepton_genMatch.endswith("0j") and hadTau_genMatch.endswith("0j"):
            self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_and_hadTau_genMatch)
          else:
            self.lepton_and_hadTau_genMatches_fakes.append(lepton_and_hadTau_genMatch)
    elif applyFakeRateWeights == "2lepton":
      self.apply_leptonGenMatching = True
      self.apply_hadTauGenMatching = False
      for lepton_genMatch in self.lepton_genMatches:
        if lepton_genMatch.endswith("0j"):
          self.lepton_and_hadTau_genMatches_nonfakes.append(lepton_genMatch)
        else:
          self.lepton_and_hadTau_genMatches_fakes.append(lepton_genMatch)
    else:
      raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)

    self.executable_addBackgrounds = executable_addBackgrounds    
    self.executable_addFakes = executable_addFakes
    self.executable_addFlips = executable_addFlips
    
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"] or sample_info["sample_category"] in [ "additional_signal_overlap", "background_data_estimate" ]:
        continue
      process_name = sample_info["process_name_specific"]
      for lepton_and_hadTau_selection in self.lepton_and_hadTau_selections:
        for lepton_and_hadTau_frWeight in self.lepton_and_hadTau_frWeights:
          if lepton_and_hadTau_frWeight == "enabled" and not lepton_and_hadTau_selection.startswith("Fakeable"):
            continue
          lepton_and_hadTau_selection_and_frWeight = get_lepton_and_hadTau_selection_and_frWeight(lepton_and_hadTau_selection, lepton_and_hadTau_frWeight)
          for lepton_charge_selection in self.lepton_charge_selections:
            key_dir = getKey(sample_name, lepton_and_hadTau_selection, lepton_and_hadTau_frWeight, lepton_charge_selection)
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                "_".join([ lepton_and_hadTau_selection_and_frWeight, lepton_charge_selection ]), process_name)
    for dir_type in [ DKEY_DCRD, DKEY_PLOT ]:
      initDict(self.dirs, [ dir_type ])
      self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs

    if self.applyFakeRateWeights == "3L":
      self.nonfake_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares" ]
      self.prep_dcard_processesToCopy = [ "data_obs", "TT", "TTW", "TTZ", "EWK", "Rares", "fakes_data", "fakes_mc", "flips_data" ]
      self.make_plots_backgrounds = [ "TT", "TTW", "TTZ", "EWK", "Rares", "fakes_data", "flips_data" ]
    elif applyFakeRateWeights == "2lepton":
      if era == '2015':
        for sample_name in [
          "/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v2/MINIAODSIM",
          "/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_tW_top_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_t-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ST_t-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext1-v1/MINIAODSIM",
          "/ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v2/MINIAODSIM",
          "/WWTo2L2Nu_13TeV-powheg/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM",
          "/ZZTo4L_13TeV_powheg_pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM" ]:
          self.samples[sample_name]["sample_category"] = "background_data_estimate"
        self.samples["/WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8/RunIIFall15MiniAODv2-PU25nsData2015v1_76X_mcRun2_asymptotic_v12-v1/MINIAODSIM"]["sample_category"] = "WZ"        
      elif era == '2016':
        for sample_name in [ 
          '/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/TTJets_SingleLeptFromT_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM',
          '/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/TTJets_SingleLeptFromTbar_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM',
          '/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v4/MINIAODSIM',
          '/TTJets_DiLept_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0_ext1-v1/MINIAODSIM',
          '/DYJetsToLL_M-10to50_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/DYJetsToLL_M-50_TuneCUETP8M1_13TeV-madgraphMLM-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14_ext1-v1/MINIAODSIM',
          '/WJetsToLNu_TuneCUETP8M1_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14_ext1-v1/MINIAODSIM',
          '/ST_tW_antitop_5f_inclusiveDecays_13TeV-powheg-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_tW_top_5f_NoFullyHadronicDecays_13TeV-powheg_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_t-channel_antitop_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_t-channel_top_4f_inclusiveDecays_13TeV-powhegV2-madspin-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ST_s-channel_4f_leptonDecays_13TeV-amcatnlo-pythia8_TuneCUETP8M1/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/WWTo2L2Nu_13TeV-powheg/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM',
          '/ZZTo4L_13TeV-amcatnloFXFX-pythia8/RunIISpring16MiniAODv2-PUSpring16RAWAODSIM_reHLT_80X_mcRun2_asymptotic_v14-v1/MINIAODSIM' ]:
          self.samples[sample_name]["sample_category"] = "background_data_estimate"
        self.samples["/WZTo3LNu_TuneCUETP8M1_13TeV-powheg-pythia8/RunIISpring16MiniAODv2-PUSpring16_80X_mcRun2_asymptotic_2016_miniAODv2_v0-v1/MINIAODSIM"]["sample_category"] = "WZ"
      else:
        raise ValueError("Invalid Configuration parameter 'era' = %s !!" % era)
      self.nonfake_backgrounds = [ "TTW", "TTZ", "WZ", "Rares" ]
      self.prep_dcard_processesToCopy = [ "data_obs", "TTW", "TTZ", "WZ", "Rares", "fakes_data", "fakes_mc", "flips_data" ]
      self.make_plots_backgrounds = [ "TTW", "TTZ", "WZ", "Rares", "fakes_data", "flips_data" ]
    else:
      raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % applyFakeRateWeights)
      
    self.cfgFile_analyze_original = os.path.join(self.workingDir, cfgFile_analyze_original)
    self.cfgFile_addBackgrounds_original = os.path.join(self.workingDir, "addBackgrounds_cfg.py")
    self.cfgFile_addBackgrounds_modified = {}
    self.histogramFile_addBackgrounds = {}
    self.histogramDir_addBackgrounds = {}    
    self.process_output_addBackgrounds = {}            
    self.histogramFile_hadd_stage1_5 = os.path.join(self.outputDir, DKEY_HIST, "histograms_harvested_stage1_5_%s.root" % self.channel)
    self.cfgFile_addFakes_original = os.path.join(self.workingDir, "addBackgroundLeptonFakes_cfg.py")
    self.cfgFile_addFakes_modified = {}
    self.histogramFile_addFakes = {}
    self.histogramFile_addFlips = os.path.join(self.outputDir, DKEY_HIST, "addBackgroundLeptonFlips_%s.root" % self.channel)
    self.cfgFile_addFlips_original = os.path.join(self.workingDir, "addBackgroundLeptonFlips_cfg.py")
    self.cfgFile_addFlips_modified = os.path.join(self.outputDir, DKEY_CFGS, "addBackgroundLeptonFlips_%s_cfg.py" % self.channel)
    self.histogramDir_prep_dcard = "2lss_1tau_SS_Tight"
    self.histogramDir_prep_dcard_OS = "2lss_1tau_OS_Tight"
    self.cfgFile_make_plots_original = os.path.join(self.workingDir, "makePlots_2lss_1tau_cfg.py")
    self.cfgFile_make_plots_mcClosure_original = os.path.join(self.workingDir, "makePlots_mcClosure_cfg.py")
    self.cfgFiles_make_plots_mcClosure_modified = []

    self.select_rle_output = select_rle_output
Exemplo n.º 20
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")

            inputFileList = inputFileLists[sample_name]
            for jobId in inputFileList.keys():
                ##print "processing sample %s: jobId = %i" % (process_name, jobId)

                # build config files for executing analysis code
                key_dir = getKey(process_name)
                key_analyze_job = getKey(process_name, jobId)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                    print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                        key_file, ntupleFiles)
                    continue

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles' : ntupleFiles,
                  'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%i_cfg.py" % \
                     (self.channel, process_name, jobId)),
                  'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%i.root" % \
                     (process_name, jobId)),
                  'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%i.log" % \
                     (self.channel, process_name, jobId)),
                  'sample_category' : sample_category,
                  'hadTau_selection' : self.hadTau_selection,
                  'use_HIP_mitigation_mediumMuonId' : True,
                  'is_mc' : is_mc,
                  'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                  'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
                  'selectBDT' : True,
                  'changeBranchNames' : self.changeBranchNames
                }
                self.createCfg_analyze(
                    self.jobOptions_analyze[key_analyze_job])

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1 = getKey(process_name)
                if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                    self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                    self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                  (self.channel, process_name))
                self.targets.append(
                    self.outputFile_hadd_stage1[key_hadd_stage1])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")

        return self.num_jobs
Exemplo n.º 21
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in ["Tight"]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for lepton_charge_selection in self.lepton_charge_selections:

            if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS':
              # Run MC closure only for the region that complements the SR
              continue

            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, lepton_charge_selection, sample_info
                ):
                  continue

                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, lepton_charge_selection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "addBackgroundLeptonFlips", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)                
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")    

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for lepton_charge_selection in self.lepton_charge_selections:

          if 'mcClosure' in lepton_selection and lepton_charge_selection != 'SS':
            # Run MC closure only for the region that complements the SR
            continue

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue

            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, lepton_charge_selection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, lepton_charge_selection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                syncOutput = ''
                syncTree = ''
                syncGenMatch = self.lepton_genMatches_nonfakes
                if self.do_sync:
                  mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                  if lepton_selection_and_frWeight == 'Tight':
                    if lepton_charge_selection == 'SS':
                      syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                      syncTree = 'syncTree_%s_SR' % self.channel
                    elif lepton_charge_selection == 'OS':
                      syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Flip.root' % (self.channel, central_or_shift))
                      syncTree = 'syncTree_%s_Flip' % self.channel
                    else:
                      continue
                  elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights' and lepton_charge_selection == 'SS':
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                    syncTree = 'syncTree_%s_Fake' % self.channel
                  elif mcClosure_match and lepton_charge_selection == 'SS':
                    mcClosure_type = mcClosure_match.group('type')
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                    syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel, mcClosure_type)
                  else:
                    continue
                if syncTree and central_or_shift != "central":
                  syncTree = os.path.join(central_or_shift, syncTree)
                syncRLE = ''
                if self.do_sync and self.rle_select:
                  syncRLE = self.rle_select % syncTree
                  if not os.path.isfile(syncRLE):
                    logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                    continue
                if syncOutput:
                  self.inputFiles_sync['sync'].append(syncOutput)

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'leptonChargeSelection'    : lepton_charge_selection,
                  'hadTauSelection_veto'     : hadTauVeto_selection,
                  'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'syncOutput'               : syncOutput,
                  'syncTree'                 : syncTree,
                  'syncRLE'                  : syncRLE,
                  'useNonNominal'            : self.use_nonnominal,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'syncGenMatch'             : syncGenMatch,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, lepton_charge_selection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.do_sync: continue

            if is_mc:
              logging.info("Creating configuration files to run 'addBackgrounds' for sample %s" % process_name)

              sample_categories = [ sample_category ]
              for sample_category in sample_categories:
                # sum non-fake and fake contributions for each MC sample separately
                genMatch_categories = [ "nonfake", "Convs", "fake", "flip" ]
                for genMatch_category in genMatch_categories:
                  key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection)
                  key_addBackgrounds_dir = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection, "addBackgrounds")
                  addBackgrounds_job_tuple = None
                  processes_input = None
                  process_output = None
                  if genMatch_category == "nonfake":
                    # sum non-fake contributions for each MC sample separately
                    # input processes: TT2l0g0j; ...
                    # output processes: TT; ...
                    if sample_category in self.ttHProcs:
                      lepton_genMatches = []
                      lepton_genMatches.extend(self.lepton_genMatches_nonfakes)
                      lepton_genMatches.extend(self.lepton_genMatches_Convs)
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in lepton_genMatches ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in lepton_genMatches ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_nonfakes ]
                    process_output = sample_category
                    addBackgrounds_job_tuple = (process_name, sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  elif genMatch_category == "Convs":
                    # sum conversion background  contributions for each MC sample separately
                    # input processes: TT1l1g0j, TT0l2g0j; ...
                    # output processes: TT_Convs; ...
                    if sample_category in self.ttHProcs:
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_Convs ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_Convs ]
                    process_output = "%s_Convs" % sample_category
                    addBackgrounds_job_tuple = (process_name, "%s_Convs" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  elif genMatch_category == "fake":
                    # sum fake contributions for each MC sample separately
                    # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ...
                    # output processes: TT_fake; ...
                    if sample_category in self.ttHProcs:
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_fakes ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_fakes ]
                    process_output = "%s_fake" % sample_category
                    addBackgrounds_job_tuple = (process_name, "%s_fake" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  elif genMatch_category == "flip":
                    # sum flip contributions for each MC sample separately
                    # input processes:  TT2l2f0g0j&2t0e0m0j, TT2l1f0g0j&2t0e0m0j; ...
                    # output processes: TT_flip; ...
                    if sample_category in self.ttHProcs:
                      processes_input = []
                      processes_input.extend([ "%s%s" % ("ttH_htt", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hww", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hzz", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hzg", genMatch) for genMatch in self.lepton_genMatches_flips ])
                      processes_input.extend([ "%s%s" % ("ttH_hmm", genMatch) for genMatch in self.lepton_genMatches_flips ])
                    else:
                      processes_input = [ "%s%s" % (sample_category, genMatch) for genMatch in self.lepton_genMatches_flips ]
                    process_output = "%s_flip" % sample_category
                    addBackgrounds_job_tuple = (process_name, "%s_flip" % sample_category, lepton_selection_and_frWeight, lepton_charge_selection)
                  if processes_input:
                    logging.info(" ...for genMatch option = '%s'" % genMatch_category)
                    key_addBackgrounds_job = getKey(*addBackgrounds_job_tuple)
                    cfgFile_modified = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_%s_cfg.py" % addBackgrounds_job_tuple)
                    outputFile = os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s_%s.root" % addBackgrounds_job_tuple)
                    self.jobOptions_addBackgrounds[key_addBackgrounds_job] = {
                      'inputFile' : self.outputFile_hadd_stage1[key_hadd_stage1_job],
                      'cfgFile_modified' : cfgFile_modified,
                      'outputFile' : outputFile,
                      'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], os.path.basename(cfgFile_modified).replace("_cfg.py", ".log")),
                      'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
                      'processes_input' : processes_input,
                      'process_output' : process_output
                    }
                    self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds[key_addBackgrounds_job])

                    # initialize input and output file names for hadd_stage1_5
                    key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection)
                    hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection)
                    key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
                    if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                      self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
                    self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.jobOptions_addBackgrounds[key_addBackgrounds_job]['outputFile'])
                    self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

            # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
            if not is_mc:
              key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, lepton_charge_selection)
              key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection)
              if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
                self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])

          if self.do_sync: continue

          # sum fake background contributions for the total of all MC samples
          # input processes: TT1l0g1j, TT0l1g1j, TT0l0g2j; ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_fake" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum flip background contributions for the total of all MC sample
          # input processes: TT2l1f0g0j,TT2l2f0g0j; ...
          # output process: flips_mc
          addBackgrounds_job_flips_tuple = ("flips_mc", lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_job_flips = getKey(*addBackgrounds_job_flips_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input = []
          for sample_category in sample_categories:
            processes_input.append("%s_flip" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_flips_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_flips_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_flips_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "flips_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips])

          # sum conversion background contributions for the total of all MC samples
          # input processes: TT1l1g0j, TT0l2g0j; ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, lepton_charge_selection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input = []
          for sample_category in self.convs_backgrounds:
            processes_input.append("%s_Convs" % sample_category)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(lepton_selection, lepton_frWeight, lepton_charge_selection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, lepton_charge_selection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, lepton_charge_selection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, lepton_charge_selection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_flips]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      self.addToMakefile_syncNtuple(lines_makefile)
      outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
      self.outputFile_sync['sync'] = outputFile_sync_path
      self.addToMakefile_hadd_sync(lines_makefile)
      self.addToMakefile_validate(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for lepton_charge_selection in self.lepton_charge_selections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), lepton_charge_selection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      addFakes_job_tuple = (lepton_charge_selection)
      key_addFakes_job = getKey("data_fakes", lepton_charge_selection)
      category_sideband = None
      if self.applyFakeRateWeights == "2lepton":
        category_sideband = "ttWctrl_%s_Fakeable_wFakeRateWeights" % lepton_charge_selection
      else:
        raise ValueError("Invalid Configuration parameter 'applyFakeRateWeights' = %s !!" % self.applyFakeRateWeights)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % addFakes_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % addFakes_job_tuple),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % addFakes_job_tuple),
        'category_signal' : "ttWctrl_%s_Tight" % lepton_charge_selection,
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), lepton_charge_selection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    #--------------------------------------------------------------------------
    # CV: add histograms in OS and SS regions,
    #     so that "data_fakes" background can be subtracted from OS control region used to estimate charge flip background
    key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_addFakes_job = getKey("data_fakes", "OS")
    key_hadd_stage1_6_dir = getKey("hadd", get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    key_hadd_stage1_6_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
    if key_hadd_stage1_6_job not in self.inputFiles_hadd_stage1_6:
        self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job] = []
    self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])
    self.inputFiles_hadd_stage1_6[key_hadd_stage1_6_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
    self.outputFile_hadd_stage1_6[key_hadd_stage1_6_job] = os.path.join(self.dirs[key_hadd_stage1_6_dir][DKEY_HIST], "hadd_stage1_6_Tight_OS.root")
    #--------------------------------------------------------------------------

    logging.info("Creating configuration files to run 'addBackgroundFlips'")
    key_addFlips_dir = getKey("addBackgroundLeptonFlips")
    key_addFlips_job = getKey("data_flips")
    self.jobOptions_addFlips[key_addFlips_job] = {
      'inputFile' : self.outputFile_hadd_stage1_6,
      'cfgFile_modified' : os.path.join(self.dirs[key_addFlips_dir][DKEY_CFGS], "addBackgroundLeptonFlips_cfg.py"),
      'outputFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_HIST], "addBackgroundLeptonFlips.root"),
      'logFile' : os.path.join(self.dirs[key_addFlips_dir][DKEY_LOGS], "addBackgroundLeptonFlips.log"),
      'category_signal' : "ttWctrl_SS_Tight",
      'category_sideband' : "ttWctrl_OS_Tight"
    }
    self.createCfg_addFlips(self.jobOptions_addFlips[key_addFlips_job])
    key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
    self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFlips[key_addFlips_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)      
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple),
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      key_add_syst_fakerate_dir = getKey("addSystFakeRates")
      add_syst_fakerate_job_tuple = (self.channel, histogramToFit)
      key_add_syst_fakerate_job = getKey(histogramToFit)
      self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
        'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
        'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
        'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s.root" % add_syst_fakerate_job_tuple),
        'category' : self.channel,
        'histogramToFit' : histogramToFit,
        'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
      }
      histogramDir_nominal = self.histogramDir_prep_dcard
      for lepton_type in [ 'e', 'm' ]:
        lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
        if lepton_mcClosure not in self.lepton_selections:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
        key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, 'SS')
        histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, 'SS')]
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
          'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
          'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
          'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
          'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
        })
      self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

      logging.info("Creating configuration files to run 'makePlots'")
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_dir = getKey("makePlots")
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : "t#bar{t}W control region",
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
    self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
    self.sbatchFile_addFlips = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFlips_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFlips)
      self.createScript_sbatch(self.executable_addFlips, self.sbatchFile_addFlips, self.jobOptions_addFlips)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data_withFlips(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 22
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")

            logging.info("Building dictionaries for sample %s..." %
                         process_name)
            for charge_selection in self.charge_selections:
                central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
                central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
                for central_or_shift_or_dummy in central_or_shifts_extended:
                    process_name_extended = [process_name, "hadd"]
                    for process_name_or_dummy in process_name_extended:
                        if central_or_shift_or_dummy in [
                                "hadd"
                        ] and process_name_or_dummy in ["hadd"]:
                            continue
                        if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions:
                            if not is_mc:
                                continue
                            if not self.accept_central_or_shift(
                                    central_or_shift_or_dummy, sample_info):
                                continue

                        key_dir = getKey(process_name_or_dummy,
                                         charge_selection,
                                         central_or_shift_or_dummy)
                        for dir_type in [
                                DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES
                        ]:
                            initDict(self.dirs, [key_dir, dir_type])
                            if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                                self.dirs[key_dir][dir_type] = os.path.join(
                                    self.configDir, dir_type, self.channel,
                                    "_".join([charge_selection]),
                                    process_name_or_dummy,
                                    central_or_shift_or_dummy)
                            else:
                                self.dirs[key_dir][dir_type] = os.path.join(
                                    self.outputDir, dir_type, self.channel,
                                    "_".join([charge_selection]),
                                    process_name_or_dummy)
        for subdirectory in ["comp_jetToTauFakeRate", "makePlots"]:
            key_dir = getKey(subdirectory)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, subdirectory)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, subdirectory)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue

            process_name = sample_info["process_name_specific"]
            inputFileList = inputFileLists[sample_name]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            sample_category = sample_info["sample_category"]

            for charge_selection in self.charge_selections:
                for central_or_shift in self.central_or_shifts:

                    if central_or_shift != "central" and not is_mc:
                        continue
                    if not self.accept_central_or_shift(
                            central_or_shift, sample_info):
                        continue

                    # build config files for executing analysis code
                    key_analyze_dir = getKey(process_name, charge_selection,
                                             central_or_shift)

                    for jobId in inputFileList.keys():

                        analyze_job_tuple = (process_name, charge_selection,
                                             central_or_shift, jobId)
                        key_analyze_job = getKey(*analyze_job_tuple)
                        ntupleFiles = inputFileList[jobId]
                        if len(ntupleFiles) == 0:
                            logging.warning(
                                "No input ntuples for %s --> skipping job !!" %
                                (key_analyze_job))
                            continue

                        cfgFile_modified_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_CFGS],
                            "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                        logFile_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_LOGS],
                            "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
                        histogramFile_path = os.path.join(
                            self.dirs[key_analyze_dir][DKEY_HIST],
                            "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)
                        rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                          if self.select_rle_output else ""

                        self.jobOptions_analyze[key_analyze_job] = {
                            'ntupleFiles': ntupleFiles,
                            'cfgFile_modified': cfgFile_modified_path,
                            'histogramFile': histogramFile_path,
                            'logFile': logFile_path,
                            'chargeSelection': charge_selection,
                            'jet_minPt': self.jet_minPt,
                            'jet_maxPt': self.jet_maxPt,
                            'jet_minAbsEta': self.jet_minAbsEta,
                            'jet_maxAbsEta': self.jet_maxAbsEta,
                            'hadTau_selection_tight':
                            self.hadTau_selection_tight,
                            'hadTauSelection_denominator':
                            self.hadTau_selection_denominator,
                            'hadTauSelections_numerator':
                            self.hadTau_selections_numerator,
                            'trigMatchingOptions': self.trigMatchingOptions,
                            'selEventsFileName_output': rleOutputFile_path,
                            'absEtaBins': self.absEtaBins,
                            'decayModes': self.decayModes,
                            'central_or_shift': central_or_shift,
                            'central_or_shifts_local': [],
                            'apply_hlt_filter': self.hlt_filter,
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job],
                            sample_info)

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1_dir = getKey(process_name,
                                                     charge_selection)
                        hadd_stage1_job_tuple = (process_name,
                                                 charge_selection)
                        key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                        if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[
                                key_hadd_stage1_job] = []
                        self.inputFiles_hadd_stage1[
                            key_hadd_stage1_job].append(
                                self.jobOptions_analyze[key_analyze_job]
                                ['histogramFile'])
                        self.outputFile_hadd_stage1[
                            key_hadd_stage1_job] = os.path.join(
                                self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                "hadd_stage1_%s_%s.root" %
                                hadd_stage1_job_tuple)

                # initialize input and output file names for hadd_stage2
                key_hadd_stage1_job = getKey(process_name, charge_selection)
                key_hadd_stage2_dir = getKey("hadd", charge_selection)
                key_hadd_stage2_job = getKey(charge_selection)
                if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
                self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(
                    self.outputFile_hadd_stage1[key_hadd_stage1_job])
                self.outputFile_hadd_stage2[
                    key_hadd_stage2_job] = os.path.join(
                        self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                        "hadd_stage2_%s.root" % charge_selection)

        logging.info(
            "Creating configuration files for executing 'comp_jetToTauFakeRate'"
        )
        for charge_selection in self.charge_selections:
            charge_key = "comp_%s" % charge_selection
            self.comp_input_files[charge_key] = []
            for trigMatchingOption in self.trigMatchingOptions:
                key_hadd_stage2_job = getKey(charge_selection)
                key_comp_jetToTauFakeRate_dir = getKey("comp_jetToTauFakeRate")
                key_comp_jetToTauFakeRate_job = getKey(charge_selection,
                                                       trigMatchingOption)
                self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job] = {
                        'inputFile':
                        self.outputFile_hadd_stage2[key_hadd_stage2_job],
                        'cfgFile_modified':
                        os.path.join(
                            self.dirs[DKEY_CFGS],
                            "comp_jetToTauFakeRate_%s_%s_cfg.py" %
                            (charge_selection, trigMatchingOption)),
                        'outputFile':
                        os.path.join(
                            self.dirs[DKEY_HIST],
                            "comp_jetToTauFakeRate_%s_%s.root" %
                            (charge_selection, trigMatchingOption)),
                        'logFile':
                        os.path.join(
                            self.dirs[DKEY_LOGS],
                            "comp_jetToTauFakeRate_%s_%s.log" %
                            (charge_selection, trigMatchingOption)),
                        'looseRegion':
                        "jetToTauFakeRate_%s_%s/denominator/" %
                        (charge_selection, trigMatchingOption),
                        'tightRegion':
                        "jetToTauFakeRate_%s_%s/numerator/" %
                        (charge_selection, trigMatchingOption),
                        'absEtaBins':
                        self.absEtaBins,
                        'ptBins':
                        self.ptBins,
                        'decayModes':
                        self.decayModes,
                        'hadTauSelections':
                        self.hadTau_selections_numerator,
                        'trigMatchingOption':
                        trigMatchingOption,
                        'plots_outputFileName':
                        os.path.join(
                            self.dirs[key_comp_jetToTauFakeRate_dir]
                            [DKEY_PLOT], "comp_jetToTauFakeRate_%s.png" %
                            trigMatchingOption)
                    }
                self.createCfg_comp_jetToTauFakeRate(
                    self.jobOptions_comp_jetToTauFakeRate[
                        key_comp_jetToTauFakeRate_job])
                comp_output = self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job]['outputFile']
                self.targets.append(comp_output)
                self.comp_input_files[charge_key].append(comp_output)
            self.comp_output_files[charge_key] = os.path.join(
                self.dirs[DKEY_HIST],
                "comp_jetToTauFakeRate_%s.root" % charge_selection)

        logging.info("Creating configuration files to run 'makePlots'")
        for charge_selection in self.charge_selections:
            key_hadd_stage2_job = getKey(charge_selection)
            key_makePlots_dir = getKey("makePlots")
            key_makePlots_job = getKey(charge_selection)
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2_job],
                'cfgFile_modified':
                os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS],
                             "makePlots_%s_cfg.py" % self.channel),
                'outputFile':
                os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT],
                             "makePlots_%s.png" % self.channel),
                'histogramDir':
                "jetToTauFakeRate_%s" % charge_selection,
                'label':
                None,
                'make_plots_backgrounds':
                self.make_plots_backgrounds
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])
            for trigMatchingOption in self.trigMatchingOptions:
                self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
                for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]:
                    key_hadd_stage2_job = getKey(charge_selection)
                    key_makePlots_job = getKey(charge_selection,
                                               trigMatchingOption, absEtaBin,
                                               "denominator")
                    self.jobOptions_make_plots[key_makePlots_job] = {
                      'executable' : self.executable_make_plots,
                      'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
                      'cfgFile_modified' : os.path.join(
                        self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_denominator_%s_cfg.py" % \
                          (self.channel, charge_selection, trigMatchingOption, absEtaBin)),
                      'outputFile' : os.path.join(
                        self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_denominator_%s.png" % (self.channel, charge_selection, trigMatchingOption, absEtaBin)),
                      'histogramDir' : "jetToTauFakeRate_%s_%s/denominator/%s" % (charge_selection, trigMatchingOption, absEtaBin),
                      'label' : None,
                      'make_plots_backgrounds' : self.make_plots_backgrounds
                    }
                    self.createCfg_makePlots(
                        self.jobOptions_make_plots[key_makePlots_job])
                    for hadTau_selection_numerator in self.hadTau_selections_numerator:
                        key_hadd_stage2_job = getKey(charge_selection)
                        key_makePlots_job = getKey(charge_selection,
                                                   trigMatchingOption,
                                                   absEtaBin, "numerator",
                                                   hadTau_selection_numerator)
                        self.jobOptions_make_plots[key_makePlots_job] = {
                          'executable' : self.executable_make_plots,
                          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
                          'cfgFile_modified' : os.path.join(
                            self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_%s_%s_numerator_%s_%s_cfg.py" % \
                              (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)),
                          'outputFile' : os.path.join(
                            self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_%s_%s_numerator_%s_%s.png" % \
                              (self.channel, charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin)),
                          'histogramDir' : "jetToTauFakeRate_%s_%s/numerator/%s/%s" % (charge_selection, trigMatchingOption, hadTau_selection_numerator, absEtaBin),
                          'label' : None,
                          'make_plots_backgrounds' : self.make_plots_backgrounds
                        }
                        self.createCfg_makePlots(
                            self.jobOptions_make_plots[key_makePlots_job])

        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.sbatchFile_comp_jetToTauFakeRate = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_comp_jetToTauFakeRate)
            self.createScript_sbatch(self.executable_comp_jetToTauFakeRate,
                                     self.sbatchFile_comp_jetToTauFakeRate,
                                     self.jobOptions_comp_jetToTauFakeRate)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile,
                                       make_dependency="phony_hadd_stage1",
                                       max_mem='4096M')
        self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
        self.addToMakefile_comp_hadd(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done.")

        return self.num_jobs
Exemplo n.º 23
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      sample_category = sample_info["sample_category"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        for lepton_frWeight in self.lepton_frWeights:
          if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
            continue
          if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
            continue

          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)
          for chargeSumSelection in self.chargeSumSelections:
            central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
            central_or_shift_dedicated = self.central_or_shifts if self.runTHweights(sample_info) else self.central_or_shifts_external
            central_or_shifts_extended = central_or_shift_extensions + central_or_shift_dedicated
            for central_or_shift_or_dummy in central_or_shifts_extended:
              process_name_extended = [ process_name, "hadd" ]
              for process_name_or_dummy in process_name_extended:
                if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
                  continue

                if central_or_shift_or_dummy not in central_or_shift_extensions and not self.accept_systematics(
                    central_or_shift_or_dummy, is_mc, lepton_selection, chargeSumSelection, sample_info
                ):
                  continue

                key_dir = getKey(process_name_or_dummy, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift_or_dummy)
                for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
                  if dir_type == DKEY_SYNC and not self.do_sync:
                    continue
                  initDict(self.dirs, [ key_dir, dir_type ])
                  if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                    self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy, central_or_shift_or_dummy)
                  else:
                    self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                      "_".join([ lepton_selection_and_frWeight, chargeSumSelection ]), process_name_or_dummy)
    for subdirectory in [ "addBackgrounds", "addBackgroundLeptonFakes", "prepareDatacards", "addSystFakeRates", "makePlots" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    mcClosure_regex = re.compile('Fakeable_mcClosure_(?P<type>m|e)_wFakeRateWeights')
    for lepton_selection in self.lepton_selections:
      electron_selection = lepton_selection
      muon_selection = lepton_selection

      hadTauVeto_selection = "Tight"
      hadTauVeto_selection = "|".join([ hadTauVeto_selection, self.hadTauVeto_selection_part2 ])

      if lepton_selection == "forBDTtraining":
        electron_selection = "Loose"
        muon_selection = "Loose"
      elif lepton_selection == "Fakeable_mcClosure_e":
        electron_selection = "Fakeable"
        muon_selection = "Tight"
      elif lepton_selection == "Fakeable_mcClosure_m":
        electron_selection = "Tight"
        muon_selection = "Fakeable"

      for lepton_frWeight in self.lepton_frWeights:
        if lepton_frWeight == "enabled" and not lepton_selection.startswith("Fakeable"):
          continue
        if lepton_frWeight == "disabled" and not lepton_selection in [ "Tight", "forBDTtraining" ]:
          continue
        lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_selection, lepton_frWeight)

        for chargeSumSelection in self.chargeSumSelections:

          for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
              continue
            process_name = sample_info["process_name_specific"]
            logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            use_th_weights = self.runTHweights(sample_info)

            central_or_shift_dedicated = self.central_or_shifts if use_th_weights else self.central_or_shifts_external
            for central_or_shift in central_or_shift_dedicated:
              if not self.accept_systematics(
                  central_or_shift, is_mc, lepton_selection, chargeSumSelection, sample_info
              ):
                continue

              central_or_shifts_local = []
              if central_or_shift == "central" and not use_th_weights:
                for central_or_shift_local in self.central_or_shifts_internal:
                  if self.accept_systematics(
                      central_or_shift_local, is_mc, lepton_selection, chargeSumSelection, sample_info
                  ):
                    central_or_shifts_local.append(central_or_shift_local)

              logging.info(" ... for '%s' and systematic uncertainty option '%s'" % (lepton_selection_and_frWeight, central_or_shift))

              # build config files for executing analysis code
              key_analyze_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift)

              for jobId in inputFileList.keys():

                analyze_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection, central_or_shift, jobId)
                key_analyze_job = getKey(*analyze_job_tuple)
                ntupleFiles = inputFileList[jobId]
                if len(ntupleFiles) == 0:
                  logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
                  continue

                syncOutput = ''
                syncTree = ''
                if self.do_sync:
                  if chargeSumSelection != 'OS':
                    continue
                  mcClosure_match = mcClosure_regex.match(lepton_selection_and_frWeight)
                  if lepton_selection_and_frWeight == 'Tight':
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_SR.root' % (self.channel, central_or_shift))
                    syncTree   = 'syncTree_%s_SR' % self.channel.replace('_', '')
                  elif lepton_selection_and_frWeight == 'Fakeable_wFakeRateWeights':
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_Fake.root' % (self.channel, central_or_shift))
                    syncTree   = 'syncTree_%s_Fake' % self.channel.replace('_', '')
                  elif mcClosure_match:
                    mcClosure_type = mcClosure_match.group('type')
                    syncOutput = os.path.join(self.dirs[key_analyze_dir][DKEY_SYNC], '%s_%s_mcClosure_%s.root' % (self.channel, central_or_shift, mcClosure_type))
                    syncTree = 'syncTree_%s_mcClosure_%s' % (self.channel.replace('_', ''), mcClosure_type)
                  else:
                    continue
                if syncTree and central_or_shift != "central":
                  syncTree = os.path.join(central_or_shift, syncTree)
                syncRLE = ''
                if self.do_sync and self.rle_select:
                  syncRLE = self.rle_select % syncTree
                  if not os.path.isfile(syncRLE):
                    logging.warning("Input RLE file for the sync is missing: %s; skipping the job" % syncRLE)
                    continue
                if syncOutput:
                  self.inputFiles_sync['sync'].append(syncOutput)

                cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
                logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % analyze_job_tuple)
                rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                     if self.select_rle_output else ""
                histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%s_%i.root" % analyze_job_tuple)
                branchName_memOutput = '%s_%s' % (self.MEMbranch, self.get_addMEM_systematics(central_or_shift)) \
                                       if self.MEMbranch else ''

                self.jobOptions_analyze[key_analyze_job] = {
                  'ntupleFiles'              : ntupleFiles,
                  'cfgFile_modified'         : cfgFile_modified_path,
                  'histogramFile'            : histogramFile_path,
                  'logFile'                  : logFile_path,
                  'selEventsFileName_output' : rleOutputFile_path,
                  'electronSelection'        : electron_selection,
                  'muonSelection'            : muon_selection,
                  'apply_leptonGenMatching'  : self.apply_leptonGenMatching,
                  'hadTauSelection'          : hadTauVeto_selection,
                  'chargeSumSelection'       : chargeSumSelection,
                  'applyFakeRateWeights'     : self.applyFakeRateWeights if not lepton_selection == "Tight" else "disabled",
                  'central_or_shift'         : central_or_shift,
                  'central_or_shifts_local'  : central_or_shifts_local,
                  'selectBDT'                : self.isBDTtraining,
                  'branchName_memOutput'     : branchName_memOutput,
                  'syncOutput'               : syncOutput,
                  'syncTree'                 : syncTree,
                  'syncRLE'                  : syncRLE,
                  'apply_hlt_filter'         : self.hlt_filter,
                  'useNonNominal'            : self.use_nonnominal,
                  'fillGenEvtHistograms'     : True,
                  'isControlRegion'          : self.isControlRegion,
                }
                self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info, lepton_selection)

                # initialize input and output file names for hadd_stage1
                key_hadd_stage1_dir = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection)
                hadd_stage1_job_tuple = (process_name, lepton_selection_and_frWeight, chargeSumSelection)
                key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
                if not key_hadd_stage1_job in self.inputFiles_hadd_stage1:
                  self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
                self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
                self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                                "hadd_stage1_%s_%s_%s.root" % hadd_stage1_job_tuple)

            if self.isBDTtraining or self.do_sync:
              continue

            # add output files of hadd_stage1 for data to list of input files for hadd_stage1_5
            key_hadd_stage1_job = getKey(process_name, lepton_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection)
            hadd_stage1_5_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection)
            key_hadd_stage1_5_job = getKey(*hadd_stage1_5_job_tuple)
            if not key_hadd_stage1_5_job in self.inputFiles_hadd_stage1_5:
              self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job] = []
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
            self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job] = os.path.join(self.dirs[key_hadd_stage1_5_dir][DKEY_HIST],
                                                                        "hadd_stage1_5_%s_%s.root" % hadd_stage1_5_job_tuple)

          if self.isBDTtraining or self.do_sync:
            continue

          ## doing list of processes to make the hadd in _Convs and _fake
          ## we could remove the tH ones with althernative couplings
          sample_categories = []
          sample_categories.extend(self.nonfake_backgrounds)
          sample_categories.extend(self.ttHProcs)
          processes_input_base = self.get_processes_input_base(sample_categories)

          # sum fake background contributions for the total of all MC sample
          # input processes: TT_fake, TTW_fake, TTWW_fake, ...
          # output process: fakes_mc
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_dir = getKey("addBackgrounds")
          addBackgrounds_job_fakes_tuple = ("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_fakes = getKey(*addBackgrounds_job_fakes_tuple)
          processes_input = []
          for process_input_base in processes_input_base:
            if "HH" in process_input_base:
              continue
            processes_input.append("%s_fake" % process_input_base)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_fakes_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_fakes_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_fakes_tuple),
            'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "fakes_mc"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes])

          # sum conversion background contributions for the total of all MC sample
          # input processes: TT_Convs, TTW_Convs, TTWW_Convs, ...
          # output process: Convs
          addBackgrounds_job_Convs_tuple = ("Convs", lepton_selection_and_frWeight, chargeSumSelection)
          key_addBackgrounds_job_Convs = getKey(*addBackgrounds_job_Convs_tuple)
          processes_input = []
          for process_input_base in self.convs_backgrounds:
            if "HH" in process_input_base:
              continue
            processes_input.append("%s_Convs" % process_input_base)
          self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs] = {
            'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
            'cfgFile_modified' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_CFGS], "addBackgrounds_%s_%s_%s_cfg.py" % addBackgrounds_job_Convs_tuple),
            'outputFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_HIST], "addBackgrounds_%s_%s_%s.root" % addBackgrounds_job_Convs_tuple),
            'logFile' : os.path.join(self.dirs[key_addBackgrounds_dir][DKEY_LOGS], "addBackgrounds_%s_%s_%s.log" % addBackgrounds_job_Convs_tuple),
            'categories' : [ getHistogramDir(self.channel, lepton_selection, lepton_frWeight, chargeSumSelection) ],
            'processes_input' : processes_input,
            'process_output' : "Convs"
          }
          self.createCfg_addBackgrounds(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs])

          # initialize input and output file names for hadd_stage2
          key_hadd_stage1_5_job = getKey(lepton_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_dir = getKey("hadd", lepton_selection_and_frWeight, chargeSumSelection)
          hadd_stage2_job_tuple = (lepton_selection_and_frWeight, chargeSumSelection)
          key_hadd_stage2_job = getKey(*hadd_stage2_job_tuple)
          if not key_hadd_stage2_job in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
          if lepton_selection == "Tight":
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'])
            self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_Convs]['outputFile'])
          self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job])
          self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                          "hadd_stage2_%s_%s.root" % hadd_stage2_job_tuple)

    if self.isBDTtraining or self.do_sync:
      if self.is_sbatch:
        logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        if self.isBDTtraining:
          self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
        elif self.do_sync:
          self.createScript_sbatch_syncNtuple(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating Makefile")
      lines_makefile = []
      if self.isBDTtraining:
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
      elif self.do_sync:
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC, '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.addToMakefile_hadd_sync(lines_makefile)
      else:
        raise ValueError("Internal logic error")
      self.addToMakefile_validate(lines_makefile)
      self.targets.extend(self.phoniesToAdd)
      self.createMakefile(lines_makefile)
      logging.info("Done.")
      return self.num_jobs

    logging.info("Creating configuration files to run 'addBackgroundFakes'")
    for chargeSumSelection in self.chargeSumSelections:
      key_hadd_stage1_5_job = getKey(get_lepton_selection_and_frWeight("Fakeable", "enabled"), chargeSumSelection)
      key_addFakes_dir = getKey("addBackgroundLeptonFakes")
      key_addFakes_job = getKey("data_fakes", chargeSumSelection)
      category_sideband = "{}_{}_Fakeable_wFakeRateWeights".format(self.channel, chargeSumSelection)
      self.jobOptions_addFakes[key_addFakes_job] = {
        'inputFile' : self.outputFile_hadd_stage1_5[key_hadd_stage1_5_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_addFakes_dir][DKEY_CFGS], "addBackgroundLeptonFakes_%s_cfg.py" % chargeSumSelection),
        'outputFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_HIST], "addBackgroundLeptonFakes_%s.root" % chargeSumSelection),
        'logFile' : os.path.join(self.dirs[key_addFakes_dir][DKEY_LOGS], "addBackgroundLeptonFakes_%s.log" % chargeSumSelection),
        'category_signal' : "{}_{}_Tight".format(self.channel, chargeSumSelection),
        'category_sideband' : category_sideband
      }
      self.createCfg_addFakes(self.jobOptions_addFakes[key_addFakes_job])
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
      self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.jobOptions_addFakes[key_addFakes_job]['outputFile'])

    logging.info("Creating configuration files to run 'prepareDatacards'")
    for histogramToFit in self.histograms_to_fit:
      key_prep_dcard_dir = getKey("prepareDatacards")
      if "OS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
        prep_dcard_job_tuple = (self.channel, "OS", histogramToFit)
        key_prep_dcard_job = getKey("OS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard,
          'histogramToFit' : histogramToFit,
          'label' : None
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      if "SS" in self.chargeSumSelections:
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
        prep_dcard_job_tuple = (self.channel, "SS", histogramToFit)
        key_prep_dcard_job = getKey("SS", histogramToFit)
        self.jobOptions_prep_dcard[key_prep_dcard_job] = {
          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
          'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_%s_cfg.py" % prep_dcard_job_tuple),
          'datacardFile' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s_%s.root" % prep_dcard_job_tuple),
          'histogramDir' : self.histogramDir_prep_dcard_SS,
          'histogramToFit' : histogramToFit,
          'label' : 'SS'
        }
        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      # add shape templates for the following systematic uncertainties:
      #  - 'CMS_ttHl_Clos_norm_e'
      #  - 'CMS_ttHl_Clos_shape_e'
      #  - 'CMS_ttHl_Clos_norm_m'
      #  - 'CMS_ttHl_Clos_shape_m'
      for chargeSumSelection in self.chargeSumSelections:
        key_prep_dcard_job = getKey(chargeSumSelection, histogramToFit)
        key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), chargeSumSelection)
        key_add_syst_fakerate_dir = getKey("addSystFakeRates")
        add_syst_fakerate_job_tuple = (self.channel, chargeSumSelection, histogramToFit)
        key_add_syst_fakerate_job = getKey(chargeSumSelection, histogramToFit)
        self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job] = {
          'inputFile' : self.jobOptions_prep_dcard[key_prep_dcard_job]['datacardFile'],
          'cfgFile_modified' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_CFGS], "addSystFakeRates_%s_%s_%s_cfg.py" % add_syst_fakerate_job_tuple),
          'outputFile' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_DCRD], "addSystFakeRates_%s_%s_%s.root" % add_syst_fakerate_job_tuple),
          'category' : self.channel,
          'histogramToFit' : histogramToFit,
          'plots_outputFileName' : os.path.join(self.dirs[key_add_syst_fakerate_dir][DKEY_PLOT], "addSystFakeRates.png")
        }
        histogramDir_nominal = None
        if chargeSumSelection == "OS":
          histogramDir_nominal = self.histogramDir_prep_dcard
        elif chargeSumSelection == "SS":
          histogramDir_nominal = self.histogramDir_prep_dcard_SS
        else:
          raise ValueError("Invalid parameter 'chargeSumSelection' = %s !!" % chargeSumSelection)
        for lepton_type in [ 'e', 'm' ]:
          lepton_mcClosure = "Fakeable_mcClosure_%s" % lepton_type
          if lepton_mcClosure not in self.lepton_selections:
            continue
          lepton_selection_and_frWeight = get_lepton_selection_and_frWeight(lepton_mcClosure, "enabled")
          key_addBackgrounds_job_fakes = getKey("fakes_mc", lepton_selection_and_frWeight, chargeSumSelection)
          histogramDir_mcClosure = self.mcClosure_dir['%s_%s' % (lepton_mcClosure, chargeSumSelection)]
          self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job].update({
            'add_Clos_%s' % lepton_type : ("Fakeable_mcClosure_%s" % lepton_type) in self.lepton_selections,
            'inputFile_nominal_%s' % lepton_type : self.outputFile_hadd_stage2[key_hadd_stage2_job],
            'histogramName_nominal_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_nominal, histogramToFit),
            'inputFile_mcClosure_%s' % lepton_type : self.jobOptions_addBackgrounds_sum[key_addBackgrounds_job_fakes]['outputFile'],
            'histogramName_mcClosure_%s' % lepton_type : "%s/sel/evt/fakes_mc/%s" % (histogramDir_mcClosure, histogramToFit)
          })
        self.createCfg_add_syst_fakerate(self.jobOptions_add_syst_fakerate[key_add_syst_fakerate_job])

    logging.info("Creating configuration files to run 'makePlots'")
    key_makePlots_dir = getKey("makePlots")
    if "OS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard,
        'label' : self.channel,
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "SS" in self.chargeSumSelections:
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "SS")
      key_makePlots_job = getKey("SS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_%s_SS_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_%s_SS.png" % self.channel),
        'histogramDir' : self.histogramDir_prep_dcard_SS,
        'label' : "{} SS".format(self.channel),
        'make_plots_backgrounds' : self.make_plots_backgrounds
      }
      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
    if "Fakeable_mcClosure" in self.lepton_selections: #TODO
      key_hadd_stage2_job = getKey(get_lepton_selection_and_frWeight("Tight", "disabled"), "OS")
      key_makePlots_job = getKey("OS")
      self.jobOptions_make_plots[key_makePlots_job] = {
        'executable' : self.executable_make_plots_mcClosure,
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_makePlots_dir][DKEY_CFGS], "makePlots_mcClosure_%s_cfg.py" % self.channel),
        'outputFile' : os.path.join(self.dirs[key_makePlots_dir][DKEY_PLOT], "makePlots_mcClosure_%s.png" % self.channel)
      }
      self.createCfg_makePlots_mcClosure(self.jobOptions_make_plots[key_makePlots_job])

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_%s.py" % self.channel)
    self.sbatchFile_addBackgrounds_sum = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addBackgrounds_sum_%s.py" % self.channel)
    self.sbatchFile_addFakes = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_addFakes_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds, self.jobOptions_addBackgrounds)
      self.createScript_sbatch(self.executable_addBackgrounds, self.sbatchFile_addBackgrounds_sum, self.jobOptions_addBackgrounds_sum)
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_addFakes)
      self.createScript_sbatch(self.executable_addFakes, self.sbatchFile_addFakes, self.jobOptions_addFakes)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_backgrounds_from_data(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_add_syst_fakerate(lines_makefile)
    self.addToMakefile_make_plots(lines_makefile)
    self.addToMakefile_validate(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 24
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, "",
                        process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, "",
                        process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_DCRD, DKEY_PLOT,
                DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            sample_category = sample_info["sample_category"]
            is_mc = (sample_info["type"] == "mc")
            is_signal = (sample_category == "signal")

            for central_or_shift in self.central_or_shifts:

                inputFileList = inputFileLists[sample_name]
                for jobId in inputFileList.keys():
                    if central_or_shift != "central" and not is_mc:
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttH"
                    ) and sample_category != "signal":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttW"
                    ) and sample_category != "TTW":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttZ"
                    ) and sample_category != "TTZ":
                        continue

                    # build config files for executing analysis code
                    key_dir = getKey(process_name)
                    key_analyze_job = getKey(process_name, central_or_shift,
                                             jobId)
                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                            key_job, ntupleFiles)
                        continue
                    self.jobOptions_analyze[key_analyze_job] = {
                      'ntupleFiles' : ntupleFiles,
                      'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                         (self.channel, process_name, central_or_shift, jobId)),
                      'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                         (process_name, central_or_shift, jobId)),
                      'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                         (self.channel, process_name, central_or_shift, jobId)),
                      'rleOutputFile' : os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % \
                         (self.channel, process_name, central_or_shift, jobId)) if self.select_rle_output else "",
                      'sample_category' : sample_category,
                      'triggers' : sample_info["triggers"],
                      'hadTau_selection' : self.hadTau_selection_part2,
                      ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                      'use_HIP_mitigation_mediumMuonId' : True,
                      'is_mc' : is_mc,
                      'central_or_shift' : central_or_shift,
                      'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                      'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                      'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job])

                    # initialize input and output file names for hadd_stage1
                    key_hadd_stage1 = getKey(process_name)
                    if not key_hadd_stage1 in self.inputFiles_hadd_stage1.keys(
                    ):
                        self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                    self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                        self.jobOptions_analyze[key_analyze_job]
                        ['histogramFile'])
                    self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                      (self.channel, process_name))

            # initialize input and output file names for hadd_stage2
            key_hadd_stage1 = getKey(process_name)
            key_hadd_stage2 = getKey("all")
            if not key_hadd_stage2 in self.inputFiles_hadd_stage2.keys():
                self.inputFiles_hadd_stage2[key_hadd_stage2] = []
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                self.outputFile_hadd_stage1[key_hadd_stage1])
            self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \
              (self.channel))

        logging.info("Creating configuration files to run 'prepareDatacards'")
        for evtSelection in self.evtSelections:
            for histogramToFit in self.histograms_to_fit:
                key_prep_dcard_job = getKey(evtSelection, histogramToFit)
                key_hadd_stage2 = getKey("all")
                self.jobOptions_prep_dcard[key_prep_dcard_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "prepareDatacards_%s_%s_%s_cfg.py" %
                        (self.channel, evtSelection, histogramToFit)),
                    'datacardFile':
                    os.path.join(
                        self.dirs[DKEY_DCRD],
                        "prepareDatacards_%s_%s_%s.root" %
                        (self.channel, evtSelection, histogramToFit)),
                    'histogramDir':
                    "_".join([self.histogramDir_prep_dcard, evtSelection]),
                    'histogramToFit':
                    histogramToFit,
                    'label':
                    None
                }
                self.createCfg_prep_dcard(
                    self.jobOptions_prep_dcard[key_prep_dcard_job])

        logging.info("Creating configuration files to run 'makePlots'")
        for evtSelection in self.evtSelections:
            key_makePlots_job = getKey(evtSelection)
            key_hadd_stage2 = getKey("all")
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2],
                'cfgFile_modified':
                os.path.join(
                    self.dirs[DKEY_CFGS],
                    "makePlots_%s_%s_cfg.py" % (self.channel, evtSelection)),
                'outputFile':
                os.path.join(
                    self.dirs[DKEY_PLOT],
                    "makePlots_%s_%s.png" % (self.channel, evtSelection)),
                'histogramDir':
                "_".join([self.histogramDir_prep_dcard, evtSelection]),
                'label':
                evtSelection,
                'make_plots_backgrounds':
                self.make_plots_backgrounds
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch()

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
Exemplo n.º 25
0
    def __init__(self,
            treeName,
            outputDir,
            cfgDir,
            executable_addMEM,
            samples,
            era,
            check_output_files,
            running_method,
            max_files_per_job,
            mem_integrations_per_job,
            max_mem_integrations,
            num_parallel_jobs,
            leptonSelection,
            hadTauSelection,
            integration_choice,
            jet_cleaning_by_index,
            dry_run,
            use_nonnominal,
            use_home,
            channel,
            rle_filter_file = '',
            submission_cmd = None,
            pool_id = '',
            max_jobs_per_sample = -1,
          ):

        self.treeName = treeName
        self.outputDir = outputDir
        self.cfgDir = cfgDir
        self.executable_addMEM = executable_addMEM
        self.mem_integrations_per_job = mem_integrations_per_job
        self.max_files_per_job = max_files_per_job
        self.max_mem_integrations = max_mem_integrations
        self.max_jobs_per_sample = max_jobs_per_sample
        self.samples = samples
        self.era = era
        self.check_output_files = check_output_files
        self.channel = channel
        self.rle_filter_file = rle_filter_file
        self.leptonSelection = leptonSelection
        self.hadTauSelection = hadTauSelection
        if self.hadTauSelection:
            self.hadTauDefinition = self.hadTauSelection.split('|')[0]
            self.hadTauWorkingPoint = self.hadTauSelection.split('|')[1]
        else:
            self.hadTauDefinition = None
            self.hadTauWorkingPoint = None
        self.maxPermutations_branchName = None
        self.integration_choice = integration_choice
        self.jet_cleaning_by_index = jet_cleaning_by_index
        logging.info(
            "Number of integration points: %s" % self.integration_choice
        )
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(
          self.cfgDir, "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.dry_run = dry_run
        self.use_nonnominal = use_nonnominal
        self.use_home = use_home
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: {workingDir}".format(workingDir = self.workingDir))

        for dirPath in [self.outputDir, self.cfgDir]:
          create_if_not_exists(dirPath)

        self.stdout_file_path = os.path.join(self.cfgDir, "stdout_%s.log" % self.channel)
        self.stderr_file_path = os.path.join(self.cfgDir, "stderr_%s.log" % self.channel)
        self.sw_ver_file_cfg  = os.path.join(self.cfgDir, "VERSION_%s.log" % self.channel)
        self.sw_ver_file_out  = os.path.join(self.outputDir, "VERSION_%s.log" % self.channel)
        self.submission_out   = os.path.join(self.cfgDir, "SUBMISSION_%s.log" % self.channel)
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version((
            self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out
        ))
        check_submission_cmd(self.submission_out, submission_cmd)

        self.dirs = {}
        self.samples = samples
        self.cfgFiles_addMEM_modified = {}
        self.shFiles_addMEM_modified = {}
        self.logFiles_addMEM = {}
        self.sbatchFile_addMEM = os.path.join(self.cfgDir, "sbatch_addMEM_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.hadd_records = {}
        self.filesToClean = []

        del self.samples['sum_events']
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_NTUPLES, DKEY_FINAL_NTUPLES]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, process_name)
            for dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_HADD, DKEY_HADD_RT]:
                initDict(self.dirs, [key_dir, dir_type])
                self.dirs[key_dir][dir_type] = os.path.join(self.cfgDir, dir_type, self.channel, process_name)

        self.cvmfs_error_log = {}
Exemplo n.º 26
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for apply_jetSmearing in self.apply_jetSmearing_options:
            jetSmearingLabel = None
            if apply_jetSmearing:
                jetSmearingLabel = "jetSmearingEnabled"
            else:
                jetSmearingLabel = "jetSmearingDisabled"
            for apply_metSmearing in self.apply_metSmearing_options:
                metSmearingLabel = None
                if apply_metSmearing:
                    metSmearingLabel = "metSmearingEnabled"
                else:
                    metSmearingLabel = "metSmearingDisabled"
                for sample_name, sample_info in self.samples.items():
                    if not sample_info["use_it"]:
                        continue
                    process_name = sample_info["process_name_specific"]
                    isSignal = True if process_name.find(
                        "signal") != -1 else False
                    logging.info(
                        "Creating configuration files to run '%s' for sample %s"
                        % (self.executable_analyze, process_name))
                    sample_category = sample_info["sample_category"]

                    inputFileList = inputFileLists[sample_name]
                    numJobsPerFile = None
                    if sample_info[
                            "process_name_specific"] == "signal_ggf_nonresonant_node_sm_hh_2b2v":
                        numJobsPerFile = 500
                    elif sample_info[
                            "process_name_specific"] == "signal_ggf_nonresonant_cHHH1_hh_2b2v":
                        numJobsPerFile = 100
                    elif sample_info[
                            "process_name_specific"] == "TTJets_DiLept":
                        numJobsPerFile = 50
                    elif sample_info[
                            "process_name_specific"] == "TTJets_DiLept_ext1":
                        numJobsPerFile = 50
                    elif sample_info["process_name_specific"] == "TTTo2L2Nu":
                        numJobsPerFile = 10
                    else:
                        raise ValueError("Invalid sample: %s" %
                                         sample_info["process_name_specific"])
                    numJobs = numJobsPerFile * len(inputFileList.keys())
                    for jobId in range(1, numJobs + 1):

                        ntupleId = ((jobId - 1) / numJobsPerFile) + 1
                        maxSelEvents = 500
                        skipSelEvents = maxSelEvents * (
                            (jobId - 1) % numJobsPerFile)

                        # build config files for executing analysis code
                        key_dir = getKey(process_name)
                        key_analyze_job = getKey(process_name,
                                                 jetSmearingLabel,
                                                 metSmearingLabel, jobId)
                        ntupleFiles = inputFileList[ntupleId]
                        if len(ntupleFiles) == 0:
                            logging.warning(
                                "No input ntuples for %s --> skipping job !!" %
                                (key_analyze_job))
                            continue

                        cfgFile_modified_path = os.path.join(
                            self.dirs[key_dir][DKEY_CFGS],
                            "analyze_%s_%s_%s_%s_%i_cfg.py" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        histogramFile_path = os.path.join(
                            self.dirs[key_dir][DKEY_HIST],
                            "analyze_%s_%s_%s_%s_%i.root" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        logFile_path = os.path.join(
                            self.dirs[key_dir][DKEY_LOGS],
                            "analyze_%s_%s_%s_%s_%i.log" %
                            (self.channel, process_name, jetSmearingLabel,
                             metSmearingLabel, jobId))
                        rleOutputFile_path = os.path.join(self.dirs[key_dir][DKEY_RLES], "rle_%s_%s_%s_%s_%i.txt" % (self.channel, process_name, jetSmearingLabel, metSmearingLabel, jobId)) \
                                             if self.select_rle_output else ""
                        self.jobOptions_analyze[key_analyze_job] = {
                            'ntupleFiles': ntupleFiles,
                            'cfgFile_modified': cfgFile_modified_path,
                            'histogramFile': histogramFile_path,
                            'logFile': logFile_path,
                            'selEventsFileName_output': rleOutputFile_path,
                            'apply_jetSmearing': apply_jetSmearing,
                            'apply_metSmearing': apply_metSmearing,
                            'maxSelEvents': maxSelEvents,
                            'skipSelEvents': skipSelEvents
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job],
                            sample_info)

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1 = getKey(process_name,
                                                 jetSmearingLabel,
                                                 metSmearingLabel)
                        if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                        self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                            self.jobOptions_analyze[key_analyze_job]
                            ['histogramFile'])
                        self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s_%s.root" % \
                          (self.channel, process_name, jetSmearingLabel, metSmearingLabel))

                    # add output files of hadd_stage1 to list of input files for hadd_stage2
                    key_hadd_stage1 = getKey(process_name, jetSmearingLabel,
                                             metSmearingLabel)
                    key_hadd_stage2 = getKey("")
                    if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
                        self.inputFiles_hadd_stage2[key_hadd_stage2] = []
                    self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                        self.outputFile_hadd_stage1[key_hadd_stage1])
                    self.outputFile_hadd_stage2[
                        key_hadd_stage2] = os.path.join(
                            self.dirs[DKEY_HIST],
                            "histograms_harvested_stage2_%s.root" %
                            self.channel)

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)

        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.targets.extend(self.outputFile_hadd_stage2.values())
        self.createMakefile(lines_makefile)

        logging.info("Done")

        return self.num_jobs
Exemplo n.º 27
0
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            for charge_selection in self.charge_selections:
                key_dir = getKey(process_name, charge_selection)
                for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                    initDict(self.dirs, [key_dir, dir_type])
                    if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                        self.dirs[key_dir][dir_type] = os.path.join(
                            self.configDir, dir_type, self.channel,
                            "_".join([charge_selection]), process_name)
                    else:
                        self.dirs[key_dir][dir_type] = os.path.join(
                            self.outputDir, dir_type, self.channel,
                            "_".join([charge_selection]), process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT,
                    DKEY_HADD_RT
            ]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        ##print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            apply_genWeight = sample_info["apply_genWeight"] if (
                is_mc and "apply_genWeight" in sample_info.keys()) else False
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]
            apply_trigger_bits = (
                is_mc and
                (self.era == "2015" or
                 (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

            for charge_selection in self.charge_selections:
                for central_or_shift in self.central_or_shifts:

                    inputFileList = inputFileLists[sample_name]
                    for jobId in inputFileList.keys():
                        if central_or_shift != "central" and not is_mc:
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttH"
                        ) and sample_category != "signal":
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttW"
                        ) and sample_category != "TTW":
                            continue
                        if central_or_shift.startswith(
                                "CMS_ttHl_thu_shape_ttZ"
                        ) and sample_category != "TTZ":
                            continue

                        # build config files for executing analysis code
                        key_dir = getKey(process_name, charge_selection)
                        key_analyze_job = getKey(process_name,
                                                 charge_selection,
                                                 central_or_shift, jobId)

                        ntupleFiles = inputFileList[jobId]
                        if len(ntupleFiles) == 0:
                            print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                                key_job, ntupleFiles)
                            continue
                        self.jobOptions_analyze[key_analyze_job] = {
                          'ntupleFiles' : ntupleFiles,
                          'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                            (self.channel, process_name, charge_selection, central_or_shift, jobId)),
                          'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                            (process_name, charge_selection, central_or_shift, jobId)),
                          'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                            (self.channel, process_name, charge_selection, central_or_shift, jobId)),
                          'sample_category' : sample_category,
                          'triggers' : sample_info["triggers"],
                          'charge_selection' : charge_selection,
                          'jet_minPt' : self.jet_minPt,
                          'jet_maxPt' : self.jet_maxPt,
                          'jet_minAbsEta' : self.jet_minAbsEta,
                          'jet_maxAbsEta' : self.jet_maxAbsEta,
                          'hadTau_selection_denominator' : self.hadTau_selection_denominator,
                          'hadTau_selections_numerator' : self.hadTau_selections_numerator,
                          'absEtaBins' : self.absEtaBins,
                          ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                          'use_HIP_mitigation_mediumMuonId' : True,
                          'is_mc' : is_mc,
                          'central_or_shift' : central_or_shift,
                          'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                          'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                          'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                        }
                        self.createCfg_analyze(
                            self.jobOptions_analyze[key_analyze_job])

                        # initialize input and output file names for hadd_stage1
                        key_hadd_stage1 = getKey(process_name,
                                                 charge_selection)
                        if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                            self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                        self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                            self.jobOptions_analyze[key_analyze_job]
                            ['histogramFile'])
                        self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                          (self.channel, process_name, charge_selection))

                # initialize input and output file names for hadd_stage2
                key_hadd_stage1 = getKey(process_name, charge_selection)
                key_hadd_stage2 = getKey(charge_selection)
                if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
                    self.inputFiles_hadd_stage2[key_hadd_stage2] = []
                self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                    self.outputFile_hadd_stage1[key_hadd_stage1])
                self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \
                  (self.channel, charge_selection))

        logging.info(
            "Creating configuration files for executing 'comp_jetToTauFakeRate'"
        )
        for charge_selection in self.charge_selections:
            key_comp_jetToTauFakeRate_job = getKey(charge_selection)
            key_hadd_stage2 = getKey(charge_selection)
            self.jobOptions_comp_jetToTauFakeRate[
                key_comp_jetToTauFakeRate_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection),
                    'outputFile':
                    os.path.join(
                        self.dirs[DKEY_HIST],
                        "comp_jetToTauFakeRate_%s.root" % charge_selection),
                    'logFile':
                    os.path.join(
                        self.dirs[DKEY_LOGS],
                        "comp_jetToTauFakeRate_%s.log" % charge_selection),
                    'looseRegion':
                    "jetToTauFakeRate_%s/denominator/" % charge_selection,
                    'tightRegion':
                    "jetToTauFakeRate_%s/numerator/" % charge_selection,
                    'absEtaBins':
                    self.absEtaBins,
                    'ptBins':
                    self.ptBins
                }
            self.createCfg_comp_jetToTauFakeRate(
                self.jobOptions_comp_jetToTauFakeRate[
                    key_comp_jetToTauFakeRate_job])
            self.targets.append(self.jobOptions_comp_jetToTauFakeRate[
                key_comp_jetToTauFakeRate_job]['outputFile'])

        logging.info("Creating configuration files to run 'makePlots'")
        for charge_selection in self.charge_selections:
            key_makePlots_job = getKey(charge_selection)
            key_hadd_stage2 = getKey(charge_selection)
            self.jobOptions_make_plots[key_makePlots_job] = {
                'executable':
                self.executable_make_plots,
                'inputFile':
                self.outputFile_hadd_stage2[key_hadd_stage2],
                'cfgFile_modified':
                os.path.join(self.dirs[DKEY_CFGS],
                             "makePlots_%s_cfg.py" % self.channel),
                'outputFile':
                os.path.join(self.dirs[DKEY_PLOT],
                             "makePlots_%s.png" % self.channel),
                'histogramDir':
                "jetToTauFakeRate_%s" % charge_selection,
                'label':
                None,
                'make_plots_backgrounds': ["TT", "TTW", "TTZ", "EWK", "Rares"],
            }
            self.createCfg_makePlots(
                self.jobOptions_make_plots[key_makePlots_job])
            self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
            for absEtaBin in ["absEtaLt1_5", "absEta1_5to9_9"]:
                key_makePlots_job = getKey(charge_selection, absEtaBin,
                                           "denominator")
                key_hadd_stage2 = getKey(charge_selection)
                self.jobOptions_make_plots[key_makePlots_job] = {
                    'executable':
                    self.executable_make_plots,
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "makePlots_%s_%s_denominator_%s_cfg.py" %
                        (self.channel, charge_selection, absEtaBin)),
                    'outputFile':
                    os.path.join(
                        self.dirs[DKEY_PLOT],
                        "makePlots_%s_%s_denominator_%s.png" %
                        (self.channel, charge_selection, absEtaBin)),
                    'histogramDir':
                    "jetToTauFakeRate_%s/denominator/%s" %
                    (charge_selection, absEtaBin),
                    'label':
                    None,
                    'make_plots_backgrounds':
                    ["TT", "TTW", "TTZ", "EWK", "Rares"],
                }
                self.createCfg_makePlots(
                    self.jobOptions_make_plots[key_makePlots_job])
                for hadTau_selection_numerator in self.hadTau_selections_numerator:
                    key_makePlots_job = getKey(charge_selection, absEtaBin,
                                               "numerator",
                                               hadTau_selection_numerator)
                    key_hadd_stage2 = getKey(charge_selection)
                    self.jobOptions_make_plots[key_makePlots_job] = {
                        'executable':
                        self.executable_make_plots,
                        'inputFile':
                        self.outputFile_hadd_stage2[key_hadd_stage2],
                        'cfgFile_modified':
                        os.path.join(
                            self.dirs[DKEY_CFGS],
                            "makePlots_%s_%s_numerator_%s_%s_cfg.py" %
                            (self.channel, charge_selection,
                             hadTau_selection_numerator, absEtaBin)),
                        'outputFile':
                        os.path.join(
                            self.dirs[DKEY_PLOT],
                            "makePlots_%s_%s_numerator_%s_%s.png" %
                            (self.channel, charge_selection,
                             hadTau_selection_numerator, absEtaBin)),
                        'histogramDir':
                        "jetToTauFakeRate_%s/numerator/%s/%s" %
                        (charge_selection, hadTau_selection_numerator,
                         absEtaBin),
                        'label':
                        None,
                        'make_plots_backgrounds':
                        ["TT", "TTW", "TTZ", "EWK", "Rares"],
                    }
                    self.createCfg_makePlots(
                        self.jobOptions_make_plots[key_makePlots_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_comp_jetToTauFakeRate)
            self.sbatchFile_comp_jetToTauFakeRate = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
            self.createScript_sbatch(self.executable_comp_jetToTauFakeRate,
                                     self.sbatchFile_comp_jetToTauFakeRate,
                                     self.jobOptions_comp_jetToTauFakeRate)

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)
        self.addToMakefile_make_plots(lines_makefile)
        self.createMakefile(lines_makefile)

        logging.info("Done")
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      process_name = sample_info["process_name_specific"]
      for mode in self.modes:
        key_dir = getKey(process_name, mode)
        for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES, DKEY_SYNC ]:
          if dir_type == DKEY_SYNC and not self.do_sync:
            continue
          initDict(self.dirs, [ key_dir, dir_type ])
          if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, "_".join([ mode ]), process_name)
          else:
            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, "_".join([ mode ]), process_name)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_SYNC ]:
      if dir_type == DKEY_SYNC and not self.do_sync:
        continue
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)
    ##print "self.dirs = ", self.dirs

    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
      else:
        create_if_not_exists(self.dirs[key])

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for mode in self.modes:
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        process_name = sample_info["process_name_specific"]
        logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))

        sample_category = sample_info["sample_category"]
        is_mc = (sample_info["type"] == "mc")

        for central_or_shift in self.central_or_shifts:

          inputFileList = inputFileLists[sample_name]
          for jobId in inputFileList.keys():

            # build config files for executing analysis code
            key_dir = getKey(process_name, mode)
            key_analyze_job = getKey(process_name, mode, central_or_shift, jobId)
            ntupleFiles = inputFileList[jobId]
            if len(ntupleFiles) == 0:
              logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
              continue

            self.jobOptions_analyze[key_analyze_job] = {
              'ntupleFiles' : ntupleFiles,
              'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                (self.channel, process_name, mode, central_or_shift, jobId)),
              'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                (process_name, mode, central_or_shift, jobId)),
              'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                (self.channel, process_name, mode, central_or_shift, jobId)),
              'sample_category' : sample_category,
              'mode' : mode,
              'lepton_selection' : self.lepton_selection,
              'hadTau_selection' : self.hadTau_selection,
              'SVfit4tau_logM_wMassConstraint_MarkovChain' : self.SVfit4tau_logM_wMassConstraint_MarkovChain,
              'SVfit4tau_logM_woMassConstraint_MarkovChain' : self.SVfit4tau_logM_woMassConstraint_MarkovChain,
              'SVfit4tau_logM_wMassConstraint_VAMP' : self.SVfit4tau_logM_wMassConstraint_VAMP,
              'use_HIP_mitigation_mediumMuonId' : False,
              'is_mc' : is_mc,
              'central_or_shift' : central_or_shift,
              'lumi_scale' : 1.,
              'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info) else False,
            }
            self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job])

            # initialize input and output file names for hadd_stage1
            key_hadd_stage1 = getKey(process_name, mode)
            if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
              self.inputFiles_hadd_stage1[key_hadd_stage1] = []
            self.inputFiles_hadd_stage1[key_hadd_stage1].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
            self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
              (self.channel, process_name, mode))

            # initialize input and output file names for hadd_stage2
            key_hadd_stage2 = getKey()
            if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
              self.inputFiles_hadd_stage2[key_hadd_stage2] = []
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])
            self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s.root" % \
              (self.channel))

    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done")

    return self.num_jobs
Exemplo n.º 29
0
    def create(self):
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            central_or_shifts_extended = [""]
            central_or_shifts_extended.extend(self.central_or_shifts)
            central_or_shifts_extended.extend(
                ["hadd", "copyHistograms", "addBackgrounds"])
            for central_or_shift_or_dummy in central_or_shifts_extended:
                process_name_extended = [process_name, "hadd"]
                for process_name_or_dummy in process_name_extended:
                    if process_name_or_dummy in [
                            "hadd"
                    ] and central_or_shift_or_dummy != "":
                        continue
                    if central_or_shift_or_dummy in [
                            "hadd", "copyHistograms", "addBackgrounds"
                    ] and process_name_or_dummy in ["hadd"]:
                        continue
                    key_dir = getKey(process_name_or_dummy,
                                     central_or_shift_or_dummy)
                    for dir_type in [
                            DKEY_CFGS, DKEY_LOGS, DKEY_RLES, DKEY_SYNC
                    ]:
                        initDict(self.dirs, [key_dir, dir_type])
                        if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                            self.dirs[key_dir][dir_type] = os.path.join(
                                self.configDir, dir_type, self.channel,
                                process_name_or_dummy,
                                central_or_shift_or_dummy)
                        else:
                            self.dirs[key_dir][dir_type] = os.path.join(
                                self.outputDir, dir_type, self.channel,
                                process_name_or_dummy,
                                central_or_shift_or_dummy)

        for subdirectory in [
                "addBackgrounds", "addBackgroundLeptonFakes",
                "prepareDatacards", "addSystFakeRates", "makePlots"
        ]:
            key_dir = getKey(subdirectory)
            for dir_type in [
                    DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_ROOT, DKEY_DCRD,
                    DKEY_PLOT
            ]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, subdirectory)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, subdirectory)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_SYNC, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HADD_RT]:
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)

        numDirectories = 0
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                numDirectories += len(self.dirs[key])
            else:
                numDirectories += 1
        logging.info("Creating directory structure (numDirectories = %i)" %
                     numDirectories)
        numDirectories_created = 0
        frac = 1
        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
                numDirectories_created += len(self.dirs[key])
            else:
                create_if_not_exists(self.dirs[key])
                numDirectories_created = numDirectories_created + 1
            while 100 * numDirectories_created >= frac * numDirectories:
                logging.info(" %i%% completed" % frac)
                frac = frac + 1
        logging.info("Done.")

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_info, self.max_files_per_job)

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"]:
                continue
            process_name = sample_info["process_name_specific"]
            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))
            inputFileList = inputFileLists[sample_name]

            for jobId in inputFileList.keys():
                for central_or_shift in self.central_or_shifts:
                    logging.info(" ... for systematic uncertainty %s" %
                                 central_or_shift)

                    key_analyze_dir = getKey(process_name, central_or_shift)
                    analyze_job_tuple = (process_name, central_or_shift, jobId)
                    key_analyze_job = getKey(*analyze_job_tuple)
                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print("Warning: no ntupleFiles --> skipping job !!")
                        continue

                    syncOutput = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_SYNC],
                        '%s_%s.root' % (self.channel, central_or_shift))
                    syncOutputTree = self.output_tree if central_or_shift == "central" else os.path.join(
                        central_or_shift, self.output_tree)
                    self.inputFiles_sync['sync'].append(syncOutput)

                    cfgFile_modified_path = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_CFGS],
                        "analyze_%s_%s_%i_cfg.py" % analyze_job_tuple)
                    logFile_path = os.path.join(
                        self.dirs[key_analyze_dir][DKEY_LOGS],
                        "analyze_%s_%s_%i.log" % analyze_job_tuple)

                    self.jobOptions_analyze[key_analyze_job] = {
                        'ntupleFiles':
                        ntupleFiles,
                        'cfgFile_modified':
                        cfgFile_modified_path,
                        'histogramFile':
                        '',
                        'logFile':
                        logFile_path,
                        'syncTree':
                        syncOutputTree,
                        'syncOutput':
                        syncOutput,
                        'syncRLE':
                        self.rle_select if self.rle_select
                        and '%s' not in self.rle_select else '',
                        'useNonNominal':
                        self.use_nonnominal,
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job], sample_info)

        logging.info(
            "Creating script for submitting '%s' jobs to batch system" %
            self.executable_analyze)
        self.sbatchFile_analyze = os.path.join(
            self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
        self.createScript_sbatch_syncNtuple(self.executable_analyze,
                                            self.sbatchFile_analyze,
                                            self.jobOptions_analyze)
        logging.info("Creating Makefile")
        lines_makefile = []
        self.addToMakefile_syncNtuple(lines_makefile)
        outputFile_sync_path = os.path.join(self.outputDir, DKEY_SYNC,
                                            '%s.root' % self.channel)
        self.outputFile_sync['sync'] = outputFile_sync_path
        self.addToMakefile_hadd_sync(lines_makefile)
        self.targets.extend(self.phoniesToAdd)
        self.createMakefile(lines_makefile)
        logging.info("Done")
        return self.num_jobs
    def create(self):
        """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            #      for charge_selection in self.charge_selections:                                                          ## NO CHARGE SELECTION NEEDED HERE
            #        key_dir = getKey(process_name, charge_selection)                                                       ## NO CHARGE SELECTION NEEDED HERE
            key_dir = getKey(process_name)
            for dir_type in [DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS, DKEY_LOGS]:
                    #            self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                    #              "_".join([ charge_selection ]), process_name)                                                    ## NO CHARGE SELECTION NEEDED HERE
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    #            self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                    #              "_".join([ charge_selection ]), process_name)                                                    ## NO CHARGE SELECTION NEEDED HERE
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        for dir_type in [
                DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_HIST, DKEY_DCRD,
                DKEY_PLOT, DKEY_HADD_RT
        ]:
            initDict(self.dirs, [dir_type])
            if dir_type in [
                    DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_HADD_RT
            ]:  ## DKEY_PLOT TO BE ADDED LATER
                self.dirs[dir_type] = os.path.join(self.configDir, dir_type,
                                                   self.channel)
            else:
                self.dirs[dir_type] = os.path.join(self.outputDir, dir_type,
                                                   self.channel)
        print "self.dirs = ", self.dirs

        for key in self.dirs.keys():
            if type(self.dirs[key]) == dict:
                for dir_type in self.dirs[key].keys():
                    create_if_not_exists(self.dirs[key][dir_type])
            else:
                create_if_not_exists(self.dirs[key])

        inputFileLists = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            logging.info("Checking input files for sample %s" %
                         sample_info["process_name_specific"])
            inputFileLists[sample_name] = generateInputFileList(
                sample_name, sample_info, self.max_files_per_job, self.debug)

        self.inputFileIds = {}
        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue

            process_name = sample_info["process_name_specific"]

            logging.info(
                "Creating configuration files to run '%s' for sample %s" %
                (self.executable_analyze, process_name))

            is_mc = (sample_info["type"] == "mc")
            lumi_scale = 1. if not (self.use_lumi and is_mc) else sample_info[
                "xsection"] * self.lumi / sample_info["nof_events"]
            apply_genWeight = sample_info["apply_genWeight"] if (
                is_mc and "apply_genWeight" in sample_info.keys()) else False
            sample_category = sample_info["sample_category"]
            triggers = sample_info["triggers"]
            apply_trigger_bits = (
                is_mc and
                (self.era == "2015" or
                 (self.era == "2016" and sample_info["reHLT"]))) or not is_mc

            #      for charge_selection in self.charge_selections:                                        ## NO CHARGE SELECTION NEEDED HERE
            for central_or_shift in self.central_or_shifts:
                inputFileList = inputFileLists[sample_name]
                for jobId in inputFileList.keys():
                    if central_or_shift != "central" and not is_mc:
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttH"
                    ) and sample_category != "signal":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttW"
                    ) and sample_category != "TTW":
                        continue
                    if central_or_shift.startswith(
                            "CMS_ttHl_thu_shape_ttZ"
                    ) and sample_category != "TTZ":
                        continue
                    # build config files for executing analysis code
#            key_dir = getKey(process_name, charge_selection)                                  ## NO CHARGE SELECTION NEEDED HERE
                    key_dir = getKey(process_name)
                    #            key_analyze_job = getKey(process_name, charge_selection, central_or_shift, jobId) ## NO CHARGE SELECTION NEEDED HERE
                    key_analyze_job = getKey(process_name, central_or_shift,
                                             jobId)

                    ntupleFiles = inputFileList[jobId]
                    if len(ntupleFiles) == 0:
                        print "Warning: ntupleFiles['%s'] = %s --> skipping job !!" % (
                            key_job, ntupleFiles)
                        continue
                    self.jobOptions_analyze[key_analyze_job] = {
                      'ntupleFiles' : ntupleFiles,
                    #              'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%s_%i_cfg.py" % \
                    #                (self.channel, process_name, charge_selection, central_or_shift, jobId)),                          ## NO CHARGE SELECTION NEEDED HERE
                    #              'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%s_%i.root" % \
                    #                (process_name, charge_selection, central_or_shift, jobId)),                                        ## NO CHARGE SELECTION NEEDED HERE
                    #              'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%s_%i.log" % \
                    #                (self.channel, process_name, charge_selection, central_or_shift, jobId)),                          ## NO CHARGE SELECTION NEEDED HERE
                      'cfgFile_modified' : os.path.join(self.dirs[key_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % \
                        (self.channel, process_name, central_or_shift, jobId)),
                      'histogramFile' : os.path.join(self.dirs[key_dir][DKEY_HIST], "%s_%s_%i.root" % \
                        (process_name,  central_or_shift, jobId)),
                      'logFile' : os.path.join(self.dirs[key_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % \
                        (self.channel, process_name, central_or_shift, jobId)),
                      'sample_category' : sample_category,
                      'triggers' : sample_info["triggers"],
                    #              'charge_selection' : charge_selection,                                         ## NO CHARGE SELECTION NEEDED HERE
                    #              'jet_minPt' : self.jet_minPt,
                    #              'jet_maxPt' : self.jet_maxPt,
                    #              'jet_minAbsEta' : self.jet_minAbsEta,
                    #              'jet_maxAbsEta' : self.jet_maxAbsEta,
                    #              'hadTau_selections' : self.hadTau_selections,
                      'absEtaBins_e' : self.absEtaBins_e,
                      'absEtaBins_mu' : self.absEtaBins_mu,
                      'absPtBins_e' : self.absPtBins_e,
                      'absPtBins_mu' : self.absPtBins_mu,
                      ##'use_HIP_mitigation_mediumMuonId' : sample_info["use_HIP_mitigation_mediumMuonId"],
                      'use_HIP_mitigation_mediumMuonId' : True,
                      'is_mc' : is_mc,
                      'central_or_shift' : central_or_shift,
                      'lumi_scale' : 1. if not (self.use_lumi and is_mc) else sample_info["xsection"] * self.lumi / sample_info["nof_events"],
                      'apply_genWeight' : sample_info["genWeight"] if (is_mc and "genWeight" in sample_info.keys()) else False,
                      'apply_trigger_bits' : (is_mc and (self.era == "2015" or (self.era == "2016" and sample_info["reHLT"]))) or not is_mc,
                    }
                    self.createCfg_analyze(
                        self.jobOptions_analyze[key_analyze_job])

                    # initialize input and output file names for hadd_stage1
                    #            key_hadd_stage1 = getKey(process_name, charge_selection)
                    key_hadd_stage1 = getKey(
                        process_name)  ## NO CHARGE SELECTION NEEDED HERE
                    if not key_hadd_stage1 in self.inputFiles_hadd_stage1:
                        self.inputFiles_hadd_stage1[key_hadd_stage1] = []
                    self.inputFiles_hadd_stage1[key_hadd_stage1].append(
                        self.jobOptions_analyze[key_analyze_job]
                        ['histogramFile'])
                    #            self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s_%s.root" % \
                    #              (self.channel, process_name, charge_selection))                                                                                 ## NO CHARGE SELECTION NEEDED HERE
                    self.outputFile_hadd_stage1[key_hadd_stage1] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage1_%s_%s.root" % \
                      (self.channel, process_name))

        # initialize input and output file names for hadd_stage1_5
        key_hadd_stage1_5 = getKey('')
        if not key_hadd_stage1_5 in self.inputFiles_hadd_stage1_5:
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5] = []
        for key_hadd_stage1 in self.outputFile_hadd_stage1.keys():
            self.inputFiles_hadd_stage1_5[key_hadd_stage1_5].append(
                self.outputFile_hadd_stage1[key_hadd_stage1])
        self.outputFile_hadd_stage1_5[key_hadd_stage1_5] = os.path.join(
            self.dirs[DKEY_HIST], "histograms_harvested_stage1_5.root")

        ## Creating configuration files to run 'addBackgrounds_LeptonFakeRate' [stage 1.5]
        key_addBackgrounds_job = getKey('')
        self.jobOptions_addBackgrounds_LeptonFakeRate[
            key_addBackgrounds_job] = {
                'inputFile':
                self.outputFile_hadd_stage1_5[key_hadd_stage1_5],
                'cfgFile_modified':
                os.path.join(
                    self.dirs[DKEY_CFGS],
                    os.path.basename(
                        self.cfgFile_addBackgrounds_LeptonFakeRate)),
                'outputFile':
                os.path.join(self.dirs[DKEY_HIST],
                             "addBackground_LeptonFakeRate.root"),
                'logFile':
                os.path.join(
                    self.dirs[DKEY_LOGS],
                    os.path.basename(
                        self.cfgFile_addBackgrounds_LeptonFakeRate.replace(
                            "_cfg.py", ".log"))),
            }
        self.createCfg_addBackgrounds_LeptonFakeRate(
            self.
            jobOptions_addBackgrounds_LeptonFakeRate[key_addBackgrounds_job])

        # initialize input and output file names for hadd_stage2
        #        key_hadd_stage2 = getKey(charge_selection)                                                                                            ## NO CHARGE SELECTION NEEDED HERE
        #        if not key_hadd_stage2 in self.inputFiles_hadd_stage2:                                                                                ## NO CHARGE SELECTION NEEDED HERE
        #          self.inputFiles_hadd_stage2[key_hadd_stage2] = []                                                                                   ## NO CHARGE SELECTION NEEDED HERE
        #        self.inputFiles_hadd_stage2[key_hadd_stage2].append(self.outputFile_hadd_stage1[key_hadd_stage1])                                     ## NO CHARGE SELECTION NEEDED HERE
        #        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(self.dirs[DKEY_HIST], "histograms_harvested_stage2_%s_%s.root" % \        ## NO CHARGE SELECTION NEEDED HERE
        #          (self.channel, charge_selection))                                                                                                   ## NO CHARGE SELECTION NEEDED HERE
        key_hadd_stage2 = getKey('')
        if not key_hadd_stage2 in self.inputFiles_hadd_stage2:
            self.inputFiles_hadd_stage2[key_hadd_stage2] = []
        for key_hadd_stage1_5 in self.outputFile_hadd_stage1_5.keys():
            self.inputFiles_hadd_stage2[key_hadd_stage2].append(
                self.outputFile_hadd_stage1_5[key_hadd_stage1_5])
        self.inputFiles_hadd_stage2[key_hadd_stage2].append(
            self.jobOptions_addBackgrounds_LeptonFakeRate[
                key_addBackgrounds_job]['outputFile'])
        self.outputFile_hadd_stage2[key_hadd_stage2] = os.path.join(
            self.dirs[DKEY_HIST], "histograms_harvested_stage2.root")

        if self.prep_dcard:
            processesToCopy = []
            signals = []
            logging.info(
                "Creating configuration files to run 'prepareDatacards_LeptonFakeRate'"
            )
            for process in self.prep_dcard_signals:
                signals.append(process)
            self.prep_dcard_signals = signals
            for process in self.prep_dcard_processesToCopy:
                processesToCopy.append(process)
            self.prep_dcard_processesToCopy = processesToCopy
            for histogramToFit in self.histograms_to_fit:
                key_prep_dcard_job = getKey(histogramToFit)
                self.jobOptions_prep_dcard[key_prep_dcard_job] = {
                    'inputFile':
                    self.outputFile_hadd_stage2[key_hadd_stage2],
                    'cfgFile_modified':
                    os.path.join(
                        self.dirs[DKEY_CFGS],
                        "prepareDatacards_LeptonFakeRate_%s_cfg.py" %
                        (histogramToFit)),
                    'datacardFile':
                    os.path.join(self.dirs[DKEY_DCRD],
                                 "prepareDatacards_%s.root" %
                                 (histogramToFit)),
                    'histogramDir': (self.histogramDir_prep_dcard),
                    'histogramToFit':
                    histogramToFit,
                    'label':
                    None
                }
                #        self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])       ## DEF LINE
                self.createCfg_prep_dcard_LeptonFakeRate(
                    self.jobOptions_prep_dcard[key_prep_dcard_job])

        if self.is_sbatch:
            logging.info(
                "Creating script for submitting '%s' jobs to batch system" %
                self.executable_analyze)
            self.sbatchFile_analyze = os.path.join(
                self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
            #      self.createScript_sbatch()

            self.createScript_sbatch_analyze(self.executable_analyze,
                                             self.sbatchFile_analyze,
                                             self.jobOptions_analyze)
            self.sbatchFile_addBackgrounds_LeptonFakeRate = os.path.join(
                self.dirs[DKEY_SCRIPTS],
                "sbatch_addBackgrounds_LeptonFakeRate_%s.py" % self.channel)
            self.createScript_sbatch(
                self.executable_addBackgrounds_LeptonFakeRate,
                self.sbatchFile_addBackgrounds_LeptonFakeRate,
                self.jobOptions_addBackgrounds_LeptonFakeRate)


#      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_comp_jetToTauFakeRate)
#      self.sbatchFile_comp_jetToTauFakeRate = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_comp_jetToTauFakeRate.py")
#      self.createScript_sbatch(self.executable_comp_jetToTauFakeRate, self.sbatchFile_comp_jetToTauFakeRate, self.jobOptions_comp_jetToTauFakeRate)

#### FAKE RATE COMP BLOCK COMMENTED OUT ########################

#    logging.info("Creating configuration files for executing 'comp_jetToTauFakeRate'")
#    for charge_selection in self.charge_selections:
#      key_comp_jetToTauFakeRate_job = getKey(charge_selection)
#      key_hadd_stage2 = getKey(charge_selection)
#      self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job] = {
#        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#        'cfgFile_modified' : os.path.join(
#          self.dirs[DKEY_CFGS], "comp_jetToTauFakeRate_%s_cfg.py" % charge_selection),
#        'outputFile' : os.path.join(
#          self.dirs[DKEY_HIST], "comp_jetToTauFakeRate_%s.root" % charge_selection),
#        'looseRegion' : "jetToTauFakeRate_%s/denominator/" % charge_selection,
#        'tightRegion' : "jetToTauFakeRate_%s/numerator/" % charge_selection,
#        'absEtaBins' : self.absEtaBins,
#        'ptBins' : self.ptBins
#      }
#      self.createCfg_comp_jetToTauFakeRate(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job])
#      self.targets.append(self.jobOptions_comp_jetToTauFakeRate[key_comp_jetToTauFakeRate_job]['outputFile'])

#    logging.info("Creating configuration files to run 'makePlots'")
#    for charge_selection in self.charge_selections:
#      key_makePlots_job = getKey(charge_selection)
#      key_hadd_stage2 = getKey(charge_selection)
#      self.jobOptions_make_plots[key_makePlots_job] = {
#        'executable' : self.executable_make_plots,
#        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#        'cfgFile_modified' : os.path.join(
#          self.dirs[DKEY_CFGS], "makePlots_%s_cfg.py" % self.channel),
#        'outputFile' : os.path.join(
#          self.dirs[DKEY_PLOT], "makePlots_%s.png" % self.channel),
#        'histogramDir' : "jetToTauFakeRate_%s" % charge_selection,
#        'label' : None,
#        'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#      }
#      self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#      self.cfgFile_make_plots = self.cfgFile_make_plots_denominator
#      for absEtaBin in [ "absEtaLt1_5", "absEta1_5to9_9" ]:
#        key_makePlots_job = getKey(charge_selection, absEtaBin, "denominator")
#        key_hadd_stage2 = getKey(charge_selection)
#        self.jobOptions_make_plots[key_makePlots_job] = {
#          'executable' : self.executable_make_plots,
#          'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#          'cfgFile_modified' : os.path.join(
#            self.dirs[DKEY_CFGS], "makePlots_%s_%s_denominator_%s_cfg.py" % (self.channel, charge_selection, absEtaBin)),
#          'outputFile' : os.path.join(
#            self.dirs[DKEY_PLOT], "makePlots_%s_%s_denominator_%s.png" % (self.channel, charge_selection, absEtaBin)),
#          'histogramDir' : "jetToTauFakeRate_%s/denominator/%s" % (charge_selection, absEtaBin),
#          'label' : None,
#          'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#        }
#        self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#        for hadTau_selection in self.hadTau_selections:
#          key_makePlots_job = getKey(charge_selection, absEtaBin, "numerator", hadTau_selection)
#          key_hadd_stage2 = getKey(charge_selection)
#          self.jobOptions_make_plots[key_makePlots_job] = {
#            'executable' : self.executable_make_plots,
#            'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2],
#            'cfgFile_modified' : os.path.join(
#              self.dirs[DKEY_CFGS], "makePlots_%s_%s_numerator_%s_%s_cfg.py" % (self.channel, charge_selection, hadTau_selection, absEtaBin)),
#            'outputFile' : os.path.join(
#              self.dirs[DKEY_PLOT], "makePlots_%s_%s_numerator_%s_%s.png" % (self.channel, charge_selection, hadTau_selection, absEtaBin)),
#            'histogramDir' : "jetToTauFakeRate_%s/numerator/%s/%s" % (charge_selection, hadTau_selection, absEtaBin),
#            'label' : None,
#            'make_plots_backgrounds' : [ "TT", "TTW", "TTZ", "EWK", "Rares" ],
#          }
#          self.createCfg_makePlots(self.jobOptions_make_plots[key_makePlots_job])
#########################################################

        lines_makefile = []
        self.addToMakefile_analyze(lines_makefile)
        self.addToMakefile_hadd_stage1(lines_makefile)
        #    self.addToMakefile_hadd_stage1_5(lines_makefile)
        self.addToMakefile_backgrounds_from_data(lines_makefile)
        self.addToMakefile_hadd_stage2(lines_makefile)
        self.addToMakefile_prep_dcard(lines_makefile)
        #    self.addToMakefile_comp_jetToTauFakeRate(lines_makefile)   ## TO BE IMPLEMENTED LATER
        #    self.addToMakefile_make_plots(lines_makefile)              ## TO BE IMPLEMENTED LATER
        self.targets = [
            outputFile for outputFile in self.outputFile_hadd_stage2.values()
        ]
        self.createMakefile(lines_makefile)

        logging.info("Done")
Exemplo n.º 31
0
  def create(self):
    """Creates all necessary config files and runs the complete analysis workfow -- either locally or on the batch system
    """

    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue

      process_name = sample_info["process_name_specific"]
      is_mc = (sample_info["type"] == "mc")

      logging.info("Building dictionaries for sample %s..." % process_name)
      for lepton_selection in self.lepton_selections:
        central_or_shift_extensions = ["", "hadd", "addBackgrounds"]
        central_or_shifts_extended = central_or_shift_extensions + self.central_or_shifts
        for central_or_shift_or_dummy in central_or_shifts_extended:
          process_name_extended = [ process_name, "hadd" ]
          for process_name_or_dummy in process_name_extended:
            if central_or_shift_or_dummy in [ "hadd", "addBackgrounds" ] and process_name_or_dummy in [ "hadd" ]:
              continue
            if central_or_shift_or_dummy != "central" and central_or_shift_or_dummy not in central_or_shift_extensions:
              if not is_mc:
                continue
              if not self.accept_central_or_shift(central_or_shift_or_dummy, sample_info):
                continue

            key_dir = getKey(process_name_or_dummy, lepton_selection, central_or_shift_or_dummy)
            for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_RLES ]:
              initDict(self.dirs, [ key_dir, dir_type ])
              if dir_type in [ DKEY_CFGS, DKEY_LOGS ]:
                self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel,
                  "_".join([ lepton_selection ]), process_name_or_dummy, central_or_shift_or_dummy)
              else:
                self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel,
                  "_".join([ lepton_selection ]), process_name_or_dummy)
    for subdirectory in [ "prepareDatacards" ]:
      key_dir = getKey(subdirectory)
      for dir_type in [ DKEY_CFGS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
        initDict(self.dirs, [ key_dir, dir_type ])
        if dir_type in [ DKEY_CFGS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT ]:
          self.dirs[key_dir][dir_type] = os.path.join(self.configDir, dir_type, self.channel, subdirectory)
        else:
          self.dirs[key_dir][dir_type] = os.path.join(self.outputDir, dir_type, self.channel, subdirectory)
    for dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_HIST, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
      initDict(self.dirs, [ dir_type ])
      if dir_type in [ DKEY_CFGS, DKEY_SCRIPTS, DKEY_LOGS, DKEY_DCRD, DKEY_PLOT, DKEY_HADD_RT, DKEY_COMBINE_OUTPUT ]:
        self.dirs[dir_type] = os.path.join(self.configDir, dir_type, self.channel)
      else:
        self.dirs[dir_type] = os.path.join(self.outputDir, dir_type, self.channel)

    numDirectories = 0
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        numDirectories += len(self.dirs[key])
      else:
        numDirectories += 1
    logging.info("Creating directory structure (numDirectories = %i)" % numDirectories)
    numDirectories_created = 0;
    frac = 1
    for key in self.dirs.keys():
      if type(self.dirs[key]) == dict:
        for dir_type in self.dirs[key].keys():
          create_if_not_exists(self.dirs[key][dir_type])
        numDirectories_created += len(self.dirs[key])
      else:
        create_if_not_exists(self.dirs[key])
        numDirectories_created = numDirectories_created + 1
      while 100*numDirectories_created >= frac*numDirectories:
        logging.info(" %i%% completed" % frac)
        frac = frac + 1
    logging.info("Done.")

    inputFileLists = {}
    for sample_name, sample_info in self.samples.items():
      if not sample_info["use_it"]:
        continue
      logging.info("Checking input files for sample %s" % sample_info["process_name_specific"])
      inputFileLists[sample_name] = generateInputFileList(sample_info, self.max_files_per_job)

    for lepton_selection in self.lepton_selections:
      for sample_name, sample_info in self.samples.items():
        if not sample_info["use_it"]:
          continue
        process_name = sample_info["process_name_specific"]
        
        logging.info("Creating configuration files to run '%s' for sample %s" % (self.executable_analyze, process_name))
        is_mc = (sample_info["type"] == "mc")
        inputFileList = inputFileLists[sample_name]
        for central_or_shift in self.central_or_shifts:
          if central_or_shift != "central" and not is_mc:
            continue

          # build config files for executing analysis code
          key_analyze_dir = getKey(process_name, lepton_selection, central_or_shift)

          for jobId in inputFileList.keys():
            analyze_job_tuple = (process_name, lepton_selection, central_or_shift, jobId)
            key_analyze_job = getKey(*analyze_job_tuple)
            ntupleFiles = inputFileList[jobId]
            if len(ntupleFiles) == 0:
              logging.warning("No input ntuples for %s --> skipping job !!" % (key_analyze_job))
              continue

            cfgFile_modified_path = os.path.join(self.dirs[key_analyze_dir][DKEY_CFGS], "analyze_%s_%s_%s_%i_cfg.py" % analyze_job_tuple)
            logFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_LOGS], "analyze_%s_%s_%s_%i.log" % analyze_job_tuple)
            rleOutputFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_RLES], "rle_%s_%s_%s_%i.txt" % analyze_job_tuple) \
                                 if self.select_rle_output else ""
            histogramFile_path = os.path.join(self.dirs[key_analyze_dir][DKEY_HIST], "analyze_%s_%s_%s_%i.root" % analyze_job_tuple)

            self.jobOptions_analyze[key_analyze_job] = {
              'ntupleFiles'              : ntupleFiles,
              'cfgFile_modified'         : cfgFile_modified_path,
              'histogramFile'            : histogramFile_path,
              'logFile'                  : logFile_path,
              'selEventsFileName_output' : rleOutputFile_path,
              'leptonSelection'          : lepton_selection,
              'applyFakeRateWeights'     : "disabled",
              'central_or_shift'         : central_or_shift,
            }
            self.createCfg_analyze(self.jobOptions_analyze[key_analyze_job], sample_info)

            # initialize input and output file names for hadd_stage1
            key_hadd_stage1_dir = getKey(process_name, lepton_selection)
            hadd_stage1_job_tuple = (process_name, lepton_selection)
            key_hadd_stage1_job = getKey(*hadd_stage1_job_tuple)
            if not key_hadd_stage1_job in self.inputFiles_hadd_stage1.keys():
              self.inputFiles_hadd_stage1[key_hadd_stage1_job] = []
            self.inputFiles_hadd_stage1[key_hadd_stage1_job].append(self.jobOptions_analyze[key_analyze_job]['histogramFile'])
            self.outputFile_hadd_stage1[key_hadd_stage1_job] = os.path.join(self.dirs[key_hadd_stage1_dir][DKEY_HIST],
                                                                            "hadd_stage1_%s_%s.root" % hadd_stage1_job_tuple)

        # initialize input and output file names for hadd_stage2
        key_hadd_stage1_job = getKey(process_name, lepton_selection)
        key_hadd_stage2_dir = getKey("hadd", lepton_selection)
        key_hadd_stage2_job = getKey(lepton_selection)
        if not key_hadd_stage2_job in self.inputFiles_hadd_stage2.keys():
          self.inputFiles_hadd_stage2[key_hadd_stage2_job] = []
        self.inputFiles_hadd_stage2[key_hadd_stage2_job].append(self.outputFile_hadd_stage1[key_hadd_stage1_job])
        self.outputFile_hadd_stage2[key_hadd_stage2_job] = os.path.join(self.dirs[key_hadd_stage2_dir][DKEY_HIST],
                                                                        "hadd_stage2_%s.root" % lepton_selection)

    logging.info("Creating configuration files to run 'prepareDatacards'")
    processesToCopy = []
    for process in self.prep_dcard_processesToCopy:
      processesToCopy.append(process)
    self.prep_dcard_processesToCopy = processesToCopy
    processesToCopy = []
    for process in self.prep_dcard_signals:
      processesToCopy.append(process)
    self.prep_dcard_signals = processesToCopy
    for histogramToFit in self.histograms_to_fit:
      key_hadd_stage2_job = getKey("Tight")
      key_prep_dcard_dir = getKey("prepareDatacards")
      prep_dcard_job_tuple = (self.channel, histogramToFit)
      key_prep_dcard_job = getKey(histogramToFit)
      datacardFile = os.path.join(self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_%s_%s.root" % prep_dcard_job_tuple)
      self.jobOptions_prep_dcard[key_prep_dcard_job] = {
        'inputFile' : self.outputFile_hadd_stage2[key_hadd_stage2_job],
        'cfgFile_modified' : os.path.join(self.dirs[key_prep_dcard_dir][DKEY_CFGS], "prepareDatacards_%s_%s_cfg.py" % prep_dcard_job_tuple),
        'datacardFile' : datacardFile,
        'histogramDir' : self.histogramDir_prep_dcard,
        'histogramToFit' : histogramToFit,
        'label' : None
      }
      self.createCfg_prep_dcard(self.jobOptions_prep_dcard[key_prep_dcard_job])

      jobOptions_makefile = copy.deepcopy(self.jobOptions_postFit)
      jobOptions_makefile['fit_result'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'fit_{}'.format(histogramToFit), jobOptions_makefile['target']
      )
      jobOptions_makefile['hadd_stage2'] = self.outputFile_hadd_stage2[key_hadd_stage2_job]
      jobOptions_makefile['prepare_datacard'] = datacardFile
      jobOptions_makefile['data_datacard'] = os.path.join(
        self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_data_%s_%s.root" % prep_dcard_job_tuple
      )
      jobOptions_makefile['pseudodata_datacard'] = os.path.join(
        self.dirs[key_prep_dcard_dir][DKEY_DCRD], "prepareDatacards_pseudodata_%s_%s.root" % prep_dcard_job_tuple
      )
      jobOptions_makefile['makefile'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'Makefile_{}'.format(histogramToFit)
      )
      jobOptions_makefile['stdout'] = os.path.join(
        self.dirs[DKEY_COMBINE_OUTPUT], 'stdout_{}.log'.format(histogramToFit)
      )
      self.createCfg_postFit(jobOptions_makefile)

    self.sbatchFile_analyze = os.path.join(self.dirs[DKEY_SCRIPTS], "sbatch_analyze_%s.py" % self.channel)
    if self.is_sbatch:
      logging.info("Creating script for submitting '%s' jobs to batch system" % self.executable_analyze)
      self.createScript_sbatch_analyze(self.executable_analyze, self.sbatchFile_analyze, self.jobOptions_analyze)

    logging.info("Creating Makefile")
    lines_makefile = []
    self.addToMakefile_analyze(lines_makefile)
    self.addToMakefile_hadd_stage1(lines_makefile)
    self.addToMakefile_hadd_stage2(lines_makefile, make_dependency = "phony_hadd_stage1")
    self.addToMakefile_prep_dcard(lines_makefile)
    self.addToMakefile_postFit(lines_makefile)
    self.createMakefile(lines_makefile)

    logging.info("Done.")

    return self.num_jobs
Exemplo n.º 32
0
    def __init__(
        self,
        configDir,
        outputDir,
        output_file,
        executable,
        projection_module,
        samples,
        max_files_per_job,
        era,
        plot,
        check_output_files,
        running_method,
        num_parallel_jobs,
        pool_id='',
        verbose=False,
        dry_run=False,
        use_home=False,
        submission_cmd=None,
    ):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable = executable
        self.projection_module = projection_module
        self.max_num_jobs = 200000
        self.samples = samples
        self.max_files_per_job = max_files_per_job
        self.era = era
        self.plot = plot
        self.check_output_files = check_output_files
        self.verbose = verbose
        self.dry_run = dry_run
        self.use_home = use_home
        if running_method.lower() not in ["sbatch", "makefile"]:
            raise ValueError("Invalid running method: %s" % running_method)

        self.running_method = running_method
        self.is_sbatch = self.running_method.lower() == "sbatch"
        self.is_makefile = not self.is_sbatch
        self.makefile = os.path.join(
            self.configDir, "Makefile_{}".format(self.projection_module))
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        logging.info("Working directory is: %s" % self.workingDir)
        self.template_dir = os.path.join(os.getenv('CMSSW_BASE'), 'src',
                                         'tthAnalysis', 'HiggsToTauTau',
                                         'test', 'templates')
        logging.info("Templates directory is: %s" % self.template_dir)

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.output_file = os.path.join(self.outputDir, output_file)
        self.stdout_file_path = os.path.join(
            self.configDir, "stdout_{}.log".format(self.projection_module))
        self.stderr_file_path = os.path.join(
            self.configDir, "stderr_{}.log".format(self.projection_module))
        self.sw_ver_file_cfg = os.path.join(
            self.configDir, "VERSION_{}.log".format(self.projection_module))
        self.sw_ver_file_out = os.path.join(
            self.outputDir, "VERSION_{}.log".format(self.projection_module))
        self.submission_out = os.path.join(self.configDir, "SUBMISSION.log")
        self.stdout_file_path, self.stderr_file_path, self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out = get_log_version(
            (self.stdout_file_path, self.stderr_file_path,
             self.sw_ver_file_cfg, self.sw_ver_file_out, self.submission_out))
        check_submission_cmd(self.submission_out, submission_cmd)

        self.sbatchFile_projection = os.path.join(
            self.configDir, "sbatch_{}.py".format(self.projection_module))
        self.cfgFiles_projection = {}
        self.logFiles_projection = {}
        self.scriptFiles_projection = {}
        self.jobOptions_sbatch = {}

        self.inputFiles = {}
        self.outputFiles_tmp = {}
        self.outputFiles = {}

        self.phoniesToAdd = []
        self.filesToClean = []
        self.targets = []

        self.makefile_target = "sbatch_{}".format(self.projection_module)

        self.dirs = {}
        all_dirs = [
            DKEY_CFGS, DKEY_HISTO_TMP, DKEY_HISTO, DKEY_PLOTS, DKEY_LOGS,
            DKEY_SCRIPTS, DKEY_HADD_RT
        ]
        cfg_dirs = [
            DKEY_CFGS, DKEY_LOGS, DKEY_PLOTS, DKEY_SCRIPTS, DKEY_HADD_RT
        ]

        ref_genWeightsFile = os.path.join(
            os.environ['CMSSW_BASE'], 'src', 'tthAnalysis', 'HiggsToTauTau',
            'data', 'refGenWeight_{}.txt'.format(self.era))
        self.ref_genWeights = load_refGenWeightsFromFile(
            ref_genWeightsFile) if projection_module != 'puHist' else {}

        for sample_name, sample_info in self.samples.items():
            if not sample_info['use_it']:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(process_name)
            for dir_type in all_dirs:
                if dir_type == DKEY_PLOTS:
                    continue
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in cfg_dirs:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, process_name)
        for dir_type in cfg_dirs:
            initDict(self.dirs, [dir_type])
            self.dirs[dir_type] = os.path.join(self.configDir, dir_type)

        self.cvmfs_error_log = {}
        self.num_jobs = {
            'hadd': 0,
            'project': 0,
            'plot': 0,
        }
Exemplo n.º 33
0
    def __init__(self,
                 configDir,
                 outputDir,
                 executable_preselNtuple,
                 channel,
                 samples,
                 era,
                 debug,
                 running_method,
                 rle_directory,
                 version,
                 num_parallel_jobs,
                 pool_id=''):

        self.configDir = configDir
        self.outputDir = outputDir
        self.executable_preselNtuple = executable_preselNtuple
        self.channel = channel
        self.max_files_per_job = 1
        self.max_num_jobs = 200000
        self.samples = samples
        self.era = era
        self.debug = debug
        assert (running_method.lower()
                in ["sbatch",
                    "makefile"]), "Invalid running method: %s" % running_method
        self.running_method = running_method
        self.is_sbatch = False
        self.is_makefile = False
        if self.running_method.lower() == "sbatch":
            self.is_sbatch = True
        else:
            self.is_makefile = True
        self.makefile = os.path.join(self.configDir,
                                     "Makefile_%s" % self.channel)
        self.num_parallel_jobs = num_parallel_jobs
        self.pool_id = pool_id if pool_id else uuid.uuid4()

        self.workingDir = os.getcwd()
        print "Working directory is: " + self.workingDir

        self.rle_directory = rle_directory
        self.version = version
        if self.rle_directory == 'default':
            self.rle_directory = os.path.join(
                ##os.path.join("/hdfs/local/ttH_2tau", getpass.getuser(), "ttHNtupleProduction", ERA, version),
                '/home',
                getpass.getuser(),
                'ttHAnalysis',
                self.era,
                self.version,
                'rles',
                self.channel)
        elif self.rle_directory:
            if not os.path.isdir(self.rle_directory):
                logging.error("No such directory: '{directory_name}'".format(
                    directory_name=self.rle_directory))
                sys.exit(1)

        create_if_not_exists(self.configDir)
        create_if_not_exists(self.outputDir)
        self.stdout_file = codecs.open(
            os.path.join(self.outputDir, "stdout_%s.log" % self.channel), 'w',
            'utf-8')
        self.stderr_file = codecs.open(
            os.path.join(self.outputDir, "stderr_%s.log" % self.channel), 'w',
            'utf-8')
        self.dirs = {}
        self.samples = samples
        self.cfgFiles_preselNtuple_modified = {}
        self.logFiles_preselNtuple = {}
        self.sbatchFile_preselNtuple = os.path.join(
            self.configDir, "sbatch_preselNtuple_%s.py" % self.channel)
        self.inputFiles = {}
        self.outputFiles = {}
        self.filesToClean = []

        for sample_name, sample_info in self.samples.items():
            if not sample_info["use_it"] or sample_info["sample_category"] in [
                    "additional_signal_overlap", "background_data_estimate"
            ]:
                continue
            process_name = sample_info["process_name_specific"]
            key_dir = getKey(sample_name)
            for dir_type in [DKEY_CFGS, DKEY_NTUPLES, DKEY_LOGS]:
                initDict(self.dirs, [key_dir, dir_type])
                if dir_type in [DKEY_CFGS]:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.configDir, dir_type, self.channel, process_name)
                else:
                    self.dirs[key_dir][dir_type] = os.path.join(
                        self.outputDir, dir_type, self.channel, process_name)
        ##print "self.dirs = ", self.dirs

        self.cvmfs_error_log = {}