示例#1
0
def find_parents(input_file, input_rles):
  if input_file.startswith('/store'):
    return []
  if not hdfs.isfile(input_file):
    raise RuntimeError("No such file: %s" % input_file)
  if not all(RLE_REGEX.match(rle) for rle in input_rles):
    raise RuntimeError("Not all input run:lumi:event numbers conform to the expected format")

  input_file_basename = os.path.basename(input_file)
  tree_match = TREE_REGEX.match(input_file_basename)
  if not tree_match:
    raise RuntimeError("Not a valid Ntuple: %s" % input_file)

  tree_idx = int(tree_match.group('idx'))
  assert(tree_idx > 0)

  parent_candidates = []
  if input_file.startswith('/hdfs/local'):
    input_file_split = input_file.split(os.path.sep)
    assert(len(input_file_split) == 11)
    process_name = input_file_split[-3]
    version = input_file_split[-5]
    era = input_file_split[-6]

    modes = [ mode for mode in ALLOWED_MODES.keys() if version.endswith(mode) ]
    if len(modes) != 1:
      raise RuntimeError("Unable to deduce mode from input path: %s" % input_file)
    mode = modes[0]
    version_no_mode = version[:-len(mode) - 1]
    nom_signifier = version_no_mode.split('_')[-1]
    version_no_mode_nom = version_no_mode[:-len(nom_signifier) - 1]
    presel_signifier = version_no_mode_nom.split('_')[-1]
    sample_base = ALLOWED_MODES[mode]['base']
    sample_suffix = ALLOWED_MODES[mode]['suffix']
    if presel_signifier == 'wPresel':
      sample_suffix = 'preselected_{}'.format(sample_suffix) if mode == 'all' else '{}_preselected'.format(sample_suffix)
    samples = load_samples(era, True, base = sample_base, suffix = sample_suffix)
    dbs_key = ''
    for sample_key, sample_info in samples.items():
      if sample_key == 'sum_events':
        continue
      if sample_info['process_name_specific'] == process_name:
        dbs_key = sample_key
        break
    if not dbs_key:
      raise RuntimeError("Unable to find an entry from sample dictionary that corresponds to file: %s" % input_file)
    sample_nfiles = samples[dbs_key]['nof_files']
    if sample_nfiles < tree_idx:
      raise RuntimeError(
        "Tree index found from input path %s larger than expected number of Ntuples: %d" % (input_file, sample_nfiles)
      )
    if presel_signifier == 'wPresel':
      parent_samples = load_samples(era, True, base = sample_base,
        suffix = sample_suffix.replace('preselected_', '').replace('_preselected', '')
      )
      parent_sample = parent_samples[dbs_key]

    elif presel_signifier == 'woPresel':
      parent_samples = load_samples(era, False, base = sample_base)
      parent_sample = parent_samples[dbs_key]
    else:
      raise RuntimeError("Invalid preselection signifier found from input file %s: %s" % (input_file, presel_signifier))
    parent_sample_nfiles = parent_sample['nof_files']
    parent_sample_path = parent_sample['local_paths'][0]['path']
    parent_sample_blacklist = parent_sample['local_paths'][0]['blacklist']
    assert(parent_sample_nfiles >= sample_nfiles)
    whitelisted_indices = [ idx for idx in range(1, parent_sample_nfiles + 1) if idx not in parent_sample_blacklist ]
    len_whitelisted_indices = len(whitelisted_indices)
    if len_whitelisted_indices == sample_nfiles:
      # it's 1-1 correspondence
      parent_candidate = os.path.join(parent_sample_path, "%04d" % (tree_idx // 1000), 'tree_%d.root' % tree_idx)
      rle_matches = has_rles(parent_candidate, input_rles)
      if len(rle_matches) == len(input_rles):
        parent_candidates.append((parent_candidate, rle_matches))
      else:
        raise RuntimeError("Unable to find parent for: %s" % input_file)
    elif len_whitelisted_indices > sample_nfiles:
      # partition
      chunk_len = int(math.ceil(float(len_whitelisted_indices) / sample_nfiles))
      chunks = [ whitelisted_indices[idx:idx + chunk_len] for idx in range(0, len_whitelisted_indices, chunk_len) ]
      assert(len(chunks) == sample_nfiles)
      parent_chunk = chunks[tree_idx - 1]
      for parent_idx in parent_chunk:
        parent_candidate = os.path.join(parent_sample_path, "%04d" % (parent_idx // 1000), 'tree_%d.root' % parent_idx)
        rle_matches = has_rles(parent_candidate, input_rles)
        if rle_matches:
          parent_candidates.append((parent_candidate, rle_matches))
    else:
      raise RuntimeError("Fewer parent Ntuples than sibling Ntuples for the Ntuple: %s" % input_file)
  elif input_file.startswith('/hdfs/cms/store/user'):
    input_file_dirname = os.path.dirname(input_file)
    log_file = os.path.join(input_file_dirname, 'log', 'cmsRun_{}.log.tar.gz'.format(tree_idx))
    if hdfs.isfile(log_file):
      tar = tarfile.open(log_file, 'r:gz')
      tar_contents = tar.getnames()
      xml_filename = 'FrameworkJobReport-{}.xml'.format(tree_idx)
      if xml_filename in tar_contents:
        xml_tarmember = tar.getmember(xml_filename)
        xml_file = tar.extractfile(xml_tarmember)
        xml_contents = xml_file.read()
        xml_tree = ET.ElementTree(ET.fromstring(xml_contents))
        last_lfn = ''
        matched_ls = []
        expected_ls = { int(rle.split(':')[1]) : rle for rle in input_rles }
        for elem in xml_tree.iter():
          if elem.tag == 'Inputs' or len(expected_ls) == len(matched_ls):
            break
          if elem.tag == 'LFN':
            if last_lfn and matched_ls:
              parent_candidates.append((last_lfn, matched_ls))
            last_lfn = elem.text
            matched_ls = []
          elif elem.tag == 'LumiSection':
            ls = int(elem.attrib['ID'])
            if ls in expected_ls:
              matched_ls.append(expected_ls[ls])
        if last_lfn and matched_ls:
          parent_candidates.append((last_lfn, matched_ls))
      tar.close()
  else:
    raise RuntimeError("Invalid path: %s" % input_file)
  return parent_candidates
示例#2
0
mode          = args.mode
files_per_job = args.files_per_job
validate      = args.validate
use_home      = args.use_home

# Custom arguments
output_file = args.output_file

# Use the arguments
if '{era}' in output_file:
  output_file = output_file.format(era = era)
version = "%s_%s" % (version, mode)


if mode == 'sync':
  samples = load_samples(era, False, suffix = 'sync')
elif mode == 'all':
  samples = load_samples(era, False)
elif mode == 'hh':
  samples = load_samples(era, False, base = 'hh_multilepton')
elif mode == 'hh_bbww':
  samples = load_samples(era, False, base = 'hh_bbww')
else:
  raise ValueError('Invalid mode: %s' % mode)

for sample_name, sample_entry in samples.items():
  if sample_name == 'sum_events': continue
  sample_entry['use_it'] = sample_entry['type'] != 'data'

if __name__ == '__main__':
  if sample_filter:
示例#3
0
systematics_label  = args.systematics
use_nonnominal     = args.original_central
use_home           = args.use_home

# Custom arguments
output_tree = args.output_tree

# Use the arguments
central_or_shifts = []
for systematic_label in systematics_label:
  for central_or_shift in getattr(systematics, systematic_label):
    if central_or_shift not in central_or_shifts:
      central_or_shifts.append(central_or_shift)

if use_nonnominal:
  samples = load_samples(era, suffix = "sync")
else:
  raise ValueError("Implement me!")

if __name__ == '__main__':
  logging.basicConfig(
    stream = sys.stdout,
    level  = logging.INFO,
    format = '%(asctime)s - %(levelname)s: %(message)s',
  )

  if sample_filter:
    samples = filter_samples(samples, sample_filter)

  logging.info(
    "Running the jobs with the following systematic uncertainties enabled: %s" % \
示例#4
0
    'Cert_271036-284044_13TeV_23Sep2016ReReco_Collisions16_JSON.txt')
golden_json_2017 = os.path.join(
    os.environ['CMSSW_BASE'], 'src/tthAnalysis/NanoAOD/data',
    'Cert_294927-306462_13TeV_EOY2017ReReco_Collisions17_JSON_v1.txt')
golden_json_2018 = os.path.join(
    os.environ['CMSSW_BASE'], 'src/tthAnalysis/NanoAOD/data',
    'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt'
)

# Use the arguments
version = "%s_w%sPresel_%s_%s" % (version, ("" if preselection else "o"),
                                  "nonNom" if use_nonnominal else "nom", mode)
gen_matching_by_index = (gen_matching == 'by_index')

if mode == 'sync':
    samples = load_samples(era, preselection, suffix='sync')
    pileup = os.path.join(
        os.environ['CMSSW_BASE'],
        'src/tthAnalysis/HiggsToTauTau/data/pileup_%s_sync.root' % era)
elif mode == 'leptonFR_sync':
    if preselection:
        raise ValueError(
            "Does not make sense to apply preselection to Ntuples used in lepton FR sync"
        )

    samples = load_samples(era, False, suffix='leptonFR_sync')
elif mode == 'hh_bbww_sync':
    if preselection:
        raise ValueError("Preselection not possible in mode: %s" % mode)

    samples = load_samples(era, False, base='hh_bbww', suffix='sync')
  chargeSumSelections = [ "SS" ]
else:
  raise ValueError("Invalid choice for the sideband: %s" % sideband)

MEMbranch = ''

hadTauWP_map = {
  'dR03mva' : 'Loose',
  'deepVSj' : 'VLoose',
}
hadTau_selection = tau_id + hadTauWP_map[tau_id]

MEMsample_base = "addMEM_3l1tau_{}".format(hadTau_selection)

if mode == "default":
  samples = load_samples(era, suffix = "preselected" if use_preselected else "")
elif mode == "addMEM":
  samples = load_samples(era, suffix = MEMsample_base)
  MEMbranch        = 'memObjects_3l_1tau_lepFakeable_tauTight_{}'.format(hadTau_selection)
elif mode == "forBDTtraining_beforeAddMEM":
  if use_preselected:
    raise ValueError("Makes no sense to use preselected samples w/ BDT training mode")
  samples = load_samples(era, suffix = "BDT")
  hadTauWP_map_relaxed = {
    'dR03mva' : 'VVLoose',
    'deepVSj' : 'VVLoose',
  }
  if args.tau_id_wp:
    tau_id = args.tau_id[:7]
  hadTau_selection_relaxed = tau_id + hadTauWP_map_relaxed[tau_id]
elif mode == "forBDTtraining_afterAddMEM":
            central_or_shifts.append(central_or_shift)
do_sync = mode.startswith('sync')
lumi = get_lumi(era)
jet_cleaning_by_index = (jet_cleaning == 'by_index')
gen_matching_by_index = (gen_matching == 'by_index')

lepton_charge_selections = ["OS", "SS"]

hadTauWP_veto_map = {
    'dR03mva': 'Loose',
    'deepVSj': 'Loose',
}
hadTau_selection_veto = tau_id + hadTauWP_veto_map[tau_id]

if mode == "default":
    samples = load_samples(era,
                           suffix="preselected" if use_preselected else "")
elif mode == "testSignal":
    samples = load_samples(era,
                           suffix="preselected" if use_preselected else "")
    for sample_name, sample_info in samples.items():
        if sample_name == 'sum_events': continue
        if sample_info["sample_category"] in [
                "HH",
                #"signal",
                "TTWH",
                "TTZH",
                "VH",
                "ggH",
                "qqH"
        ] and sample_info["use_it"] == True:
            sample_info["use_it"] = True
示例#7
0
args = parser.parse_args()

era = args.era
output_root_dir = args.root_output
output_plot_dir = args.plot_output

if era == '2016':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_2016 as samples_to_stitch
elif era == '2017':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_2017 as samples_to_stitch
elif era == '2018':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_2018 as samples_to_stitch
else:
    raise RuntimeError("Invalid era: %s" % era)

samples = load_samples(era, is_postproc=False)

for output_dir in [output_root_dir, output_plot_dir]:
    create_if_not_exists(output_dir)

for sample_set_to_stich in samples_to_stitch:
    binning_keys = filter(lambda key: key != 'inclusive',
                          sample_set_to_stich.keys())

    sample_list = []
    for key in sample_set_to_stich:
        if key == 'inclusive':
            sample_list.extend(sample_set_to_stich[key]['samples'])
        else:
            for binned_sample in sample_set_to_stich[key]:
                sample_list.extend(binned_sample['samples'])
示例#8
0
        parent_dir = os.path.dirname(os.path.abspath(output_file))
        if not check_dir(parent_dir, use_force):
            sys.exit(1)

        logging.debug(
            "Saving RLE numbers from {input_file} to {output_file}".format(
                input_file=input_file,
                output_file=output_file,
            ))

        dump_rle(input_file, output_file, args.tree, args.run, args.lumi,
                 args.event)

    else:

        samples = load_samples(args.era, is_postproc=args.post_processed)

        output_dir = output
        if not check_dir(output_dir, use_force):
            sys.exit(1)

        idx = lambda x: int(x[x.rfind('_') + 1:x.rfind('.')])

        sample_keys = {
            v['process_name_specific']: k
            for k, v in samples.iteritems() if k != 'sum_events'
        }
        if args.sample:
            if args.sample not in sample_keys:
                raise ValueError("Unrecognized key: {sample_key}".format(
                    sample_key=args.sample))
示例#9
0
for systematic_label in systematics_label:
    for central_or_shift in getattr(systematics, systematic_label):
        if central_or_shift not in central_or_shifts:
            central_or_shifts.append(central_or_shift)

do_sync = mode.startswith('sync')
lumi = get_lumi(era)

if mode != "sync":
    samples_to_stitch = getattr(
        importlib.import_module("tthAnalysis.HiggsToTauTau.samples.stitch"),
        "samples_to_stitch_{}".format(era))

if mode == "default":
    samples = load_samples(era)
elif mode == "forBDTtraining":
    samples = load_samples(era, suffix="BDT")
    samples = load_samples_stitched(samples,
                                    era,
                                    load_dy=True,
                                    load_wjets=True)
elif mode == "sync":
    samples = load_samples(era, suffix="sync")
else:
    raise ValueError("Internal logic error")

if mode == "default" and len(central_or_shifts) <= 1:
    evtCategories = [
        "hh_bb1l", "hh_bb1l_resolvedHbb_resolvedWjj",
        "hh_bb1l_resolvedHbb_resolvedWjj_vbf",
示例#10
0
    if sample_info["nof_tree_events"] != sample_info["nof_db_events"]:
      missing_events = sample_info["nof_db_events"] - sample_info["nof_tree_events"]
      assert(missing_events > 0)
      print("{} {} ({:.1f}%)".format(dbs_name, missing_events, missing_events * 100. / sample_info["nof_db_events"]))

if __name__ == '__main__':
  parser = argparse.ArgumentParser(
    formatter_class = lambda prog: SmartFormatter(prog, max_help_position = 55)
  )
  parser.add_argument('-e', '--era',
    type = str, dest = 'era', metavar = 'year', required = True, choices = [ '2016', '2017', '2018' ],
    help = 'R|Era',
  )
  parser.add_argument('-p', '--postproc',
    dest = 'postproc', action = 'store_true', default = False, required = False,
    help = 'R|Validate sample dictionary for post-processed Ntuples',
  )
  parser.add_argument('-b', '--base',
    type = str, dest = 'base', metavar = 'string', required = False, choices = [ 'tth', 'hh_multilepton', 'hh_bbww' ],
    default = 'tth',
    help = 'R|Choice of analysis',
  )
  parser.add_argument('-s', '--suffix',
    type = str, dest = 'suffix', metavar = 'string', required = False, default = '',
    help = 'R|Suffix in the name of sample dictionary',
  )
  args = parser.parse_args()

  samples = load_samples(era = args.era, is_postproc = args.postproc, base = args.base, suffix = args.suffix)
  validate(samples)
示例#11
0
validate = args.validate
use_home = args.use_home

# Custom arguments
output_file = args.output_file

# Use the arguments
if output_file == default_output:
    output_file = output_file.format(projection=projection, era=era)
version = "%s_%s" % (version, mode)

if projection == 'pileup':
    projection_module = "puHist"

    if mode == 'all':
        samples = load_samples(era, False, base='all')
    elif mode == 'tth':
        samples = load_samples(era, False)
    elif mode == 'tth_sync':
        samples = load_samples(era, False, suffix='sync')
    elif mode == 'hh':
        samples = load_samples(era, False, base='hh_multilepton')
    elif mode == 'hh_bbww':
        samples = load_samples(era, False, base='hh_bbww')
    elif mode == 'hh_bbww_ttbar':
        samples = load_samples(era, False, base='hh_bbww', suffix='ttbar')
    elif mode == 'hh_bbww_sync':
        samples = load_samples(era, False, base='hh_bbww', suffix='sync')
    elif mode == 'hh_bbww_sync_ttbar':
        samples = load_samples(era, False, base='hh_bbww', suffix='sync_ttbar')
    else:
示例#12
0
    "hhAnalysis.multilepton.samples.metaDict_{}{}".format(era, hh_suffix))
module_hh_bbww = importlib.import_module(
    "hhAnalysis.bbww.samples.metaDict_{}_hh".format(era))
module_hh_bbww_ttbar = importlib.import_module(
    "hhAnalysis.bbww.samples.metaDict_{}_ttbar".format(era))

metaDict_tth = getattr(module_tth, METADICT)
metaDict_hh_multilepton = getattr(module_hh_multilepton, METADICT)
metaDict_hh_bbww = getattr(module_hh_bbww, METADICT)
metaDict_hh_bbww_ttbar = getattr(module_hh_bbww_ttbar, METADICT)
metaDict = collections.OrderedDict(
    itertools.chain(metaDict_tth.items(), metaDict_hh_multilepton.items(),
                    metaDict_hh_bbww.items(), metaDict_hh_bbww_ttbar.items()))
dbs_names_metaDict = list(metaDict.keys())

samples_pre_processed = load_samples(era, False, 'all')
samples_post_processed = load_samples(era, True, 'all')

samples_tth_skimmed = load_samples(era, True, suffix='preselected_base')
samples_hh_bbww_ttbar_skimmed = load_samples(era,
                                             True,
                                             base='hh_bbww',
                                             suffix='ttbar_preselected')
del samples_tth_skimmed['sum_events']
del samples_hh_bbww_ttbar_skimmed['sum_events']
samples_skimmed = collections.OrderedDict(
    itertools.chain(samples_tth_skimmed.items(),
                    samples_hh_bbww_ttbar_skimmed.items()))

if comparison == 'pre':
    samples_before = metaDict
示例#13
0
    os.environ['CMSSW_BASE'], 'src/tthAnalysis/NanoAOD/data',
    'Cert_314472-325175_13TeV_17SeptEarlyReReco2018ABC_PromptEraD_Collisions18_JSON.txt'
)

if not preselection and lep_mva_wp != 'default':
    raise ValueError(
        "Non-default lepton selection can only be used in the skimming")

# Use the arguments
version = "%s_w%sPresel_%s_%s" % (version, ("" if preselection else "o"),
                                  "nonNom" if use_nonnominal else "nom", mode)
gen_matching_by_index = (gen_matching == 'by_index')
do_sync = 'sync' in mode

if mode == 'all':
    samples = load_samples(era, preselection, base='all')
elif mode == 'tth':
    samples = load_samples(era,
                           preselection,
                           suffix='base' if preselection else '')
elif mode == 'tth_sync':
    samples = load_samples(era, preselection, suffix='sync')
    pileup = os.path.join(
        os.environ['CMSSW_BASE'],
        'src/tthAnalysis/HiggsToTauTau/data/pileup_%s_sync.root' % era)
elif mode == 'leptonFR_sync':
    if preselection:
        raise ValueError(
            "Does not make sense to apply preselection to Ntuples used in lepton FR sync"
        )
    'dR03mva': 'Medium',
    'deepVSj': 'Medium',
}
hadTau_selection_veto = tau_id + hadTauWP_veto_map[tau_id]

if sideband == 'disabled':
    chargeSumSelections = ["OS"]
elif sideband == 'enabled':
    chargeSumSelections = ["OS", "SS"]
elif sideband == 'only':
    chargeSumSelections = ["SS"]
else:
    raise ValueError("Invalid choice for the sideband: %s" % sideband)

if mode == "default":
    samples = load_samples(era,
                           suffix="preselected" if use_preselected else "")
elif mode == "addMEM":
    if not use_preselected:
        raise ValueError(
            "MEM branches can be read only from preselected Ntuples")
    samples = load_samples(era, suffix="addMEM_2lss1tau")
    MEMbranch = 'memObjects_2lss_1tau_lepFakeable_tauTight_{}'.format(
        hadTau_selection)
elif mode == "forBDTtraining_beforeAddMEM":
    if use_preselected:
        raise ValueError(
            "Makes no sense to use preselected samples w/ BDT training mode")
    samples = load_samples(era, suffix="BDT")
    if args.tau_id_wp:
        tau_id = args.tau_id[:7]
    hadTau_selection_relaxed = tau_id + hadTauWP_map[tau_id]
示例#15
0
hadTau_WP_map = {
    'dR03mva': 'Loose',
    'deepVSj': 'Loose',
}
hadTau_WP = tau_id + hadTau_WP_map[tau_id]
if tau_id_wp:
    hadTau_WP = tau_id_wp

# Use the arguments
hadTau_selection = "Tight|%s" % hadTau_WP
lumi = get_lumi(era)
jet_cleaning_by_index = (jet_cleaning == 'by_index')
gen_matching_by_index = (gen_matching == 'by_index')

samples = load_samples(era, suffix="BDT")

if __name__ == '__main__':
    if sample_filter:
        samples = filter_samples(samples, sample_filter)

    analysis = analyzeConfig_hadTopTagger(
        configDir=os.path.join("/home", getpass.getuser(), "ttHAnalysis", era,
                               version),
        outputDir=os.path.join("/hdfs/local", getpass.getuser(), "ttHAnalysis",
                               era, version),
        executable_analyze="analyze_hadTopTagger",
        cfgFile_analyze="analyze_hadTopTagger_cfg.py",
        samples=samples,
        jet_cleaning_by_index=jet_cleaning_by_index,
        gen_matching_by_index=gen_matching_by_index,
示例#16
0
jet_cleaning = args.jet_cleaning
gen_matching = args.gen_matching
use_stitched = args.use_stitched

# Use the arguments
central_or_shifts = []
for systematic_label in systematics_label:
    for central_or_shift in getattr(systematics, systematic_label):
        if central_or_shift not in central_or_shifts:
            central_or_shifts.append(central_or_shift)
lumi = get_lumi(era)
jet_cleaning_by_index = (jet_cleaning == 'by_index')
gen_matching_by_index = (gen_matching == 'by_index')

if mode == 'default':
    samples = load_samples(era)
elif mode == 'sync':
    samples = load_samples(era, suffix='leptonFR_sync')
else:
    raise ValueError('Invalid mode: %s' % mode)

samples = load_samples_stitched(samples,
                                era,
                                load_dy='dy' in use_stitched,
                                load_wjets='wjets' in use_stitched)
for sample_name, sample_info in samples.items():
    if sample_name == 'sum_events': continue

    if sample_info["type"] == "mc":
        sample_info["triggers"] = ["1e", "1mu", "2e", "2mu"]
  for central_or_shift in getattr(systematics, systematic_label):
    if central_or_shift not in central_or_shifts:
      central_or_shifts.append(central_or_shift)
do_sync = mode.startswith('sync')
lumi = get_lumi(era)
jet_cleaning_by_index = (jet_cleaning == 'by_index')
gen_matching_by_index = (gen_matching == 'by_index')

hadTauWP_veto_map = {
  'dR03mva' : 'Loose',
  'deepVSj' : 'Loose',
}
hadTau_selection_veto = tau_id + hadTauWP_veto_map[tau_id]

if mode == 'default':
  samples = load_samples(era, suffix = "preselected" if use_preselected else "")
elif mode == 'sync_wMEM':
  samples = load_samples(era, suffix = 'addMEM_3l_sync' if use_nonnominal else 'addMEM_3l_sync_nom')
elif mode == 'sync':
  sample_suffix = "sync" if use_nonnominal else "sync_nom"
  if use_preselected:
    sample_suffix = "preselected_{}".format(sample_suffix)
  samples = load_samples(era, suffix = sample_suffix)
else:
  raise ValueError("Invalid mode: %s" % mode)

if __name__ == '__main__':
  logging.info(
    "Running the jobs with the following systematic uncertainties enabled: %s" % \
    ', '.join(central_or_shifts)
  )
示例#18
0
def load_sample(era):
  stitch_module = importlib.import_module("tthAnalysis.HiggsToTauTau.samples.stitch")
  samples_to_stitch_dict = getattr(stitch_module, "samples_to_stitch_{}".format(era))
  samples = load_samples(era)
  return samples, samples_to_stitch_dict
示例#19
0
# Additional arguments
mode          = args.mode
files_per_job = args.files_per_job
validate      = args.validate
use_home      = args.use_home
use_preproc   = args.use_preprocessed

# Custom arguments
output_file = args.output_file

# Use the arguments
if '{era}' in output_file:
  output_file = output_file.format(era = era)

if mode == 'hh_multilepton':
  samples = load_samples(era, not use_preproc, base = 'hh_multilepton')
elif mode == 'hh_bbww':
  samples = load_samples(era, not use_preproc, base = 'hh_bbww')
elif mode == 'hh_bbww_sync':
  samples = load_samples(era, not use_preproc, base = 'hh_bbww', suffix = 'sync')
else:
  raise ValueError('Invalid mode: %s' % mode)

if mode != 'hh_bbww_sync':
  for sample_name, sample_entry in samples.items():
    if sample_name == 'sum_events': continue
    sample_entry['use_it'] = is_nonresonant(sample_entry['sample_category'])

if __name__ == '__main__':
  if sample_filter:
    samples = filter_samples(samples, sample_filter)
示例#20
0
if not output:
    output = os.path.join(os.environ['CMSSW_BASE'], 'src', 'tthAnalysis',
                          'HiggsToTauTau', 'data',
                          'stitched_weights_lo_{}.root'.format(era))

if era == '2016':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_lo_2016 as samples_to_stitch
elif era == '2017':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_lo_2017 as samples_to_stitch
elif era == '2018':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_lo_2018 as samples_to_stitch
else:
    raise RuntimeError("Invalid era: %s" % era)

samples = load_samples(era)

apply_sf = True
fp = ROOT.TFile.Open(output, 'recreate')
for sample_to_stitch in samples_to_stitch:
    binning_vars = sorted([var for var in sample_to_stitch['exclusive']],
                          reverse=True)
    if len(binning_vars) == 1:
        comp_weights_1(fp, samples, sample_to_stitch, binning_vars[0],
                       apply_sf)
    elif len(binning_vars) == 2:
        for binning_var in binning_vars:
            comp_weights_1(fp, samples, sample_to_stitch, binning_var,
                           apply_sf)
        comp_weights_2(fp, samples, sample_to_stitch, binning_vars[0],
                       binning_vars[1], apply_sf)
示例#21
0
if not output:
    output = os.path.join(os.environ['CMSSW_BASE'], 'src', 'tthAnalysis',
                          'HiggsToTauTau', 'data',
                          'stitched_weights_{}.root'.format(era))

if era == '2016':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_2016 as samples_to_stitch
elif era == '2017':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_2017 as samples_to_stitch
elif era == '2018':
    from tthAnalysis.HiggsToTauTau.samples.stitch import samples_to_stitch_2018 as samples_to_stitch
else:
    raise RuntimeError("Invalid era: %s" % era)

if do_wjet_parts:
    samples = load_samples(era, base='hh_multilepton', suffix='wjets')
else:
    samples = load_samples(era)

apply_sf = True
fp = ROOT.TFile.Open(output, 'recreate')
for sample_to_stitch in samples_to_stitch:
    binning_vars = [var for var in sample_to_stitch if var != 'inclusive']
    if len(binning_vars) == 1:
        comp_weights_1(fp, samples, sample_to_stitch, binning_vars[0],
                       apply_sf)
    elif len(binning_vars) == 2:
        for binning_var in binning_vars:
            comp_weights_1(fp, samples, sample_to_stitch, binning_var,
                           apply_sf)
        comp_weights_2(fp, samples, sample_to_stitch, binning_vars[0],
示例#22
0
    assert (os.path.isfile(hist_file))
    hist_title = "H1bin4"

    norm = []
    for scan_idx in range(len(klJHEP)):
        norm.append(
            model.getNormalization(
                klJHEP[scan_idx],
                ktJHEP[scan_idx],
                c2JHEP[scan_idx],
                cgJHEP[scan_idx],
                c2gJHEP[scan_idx],
                hist_file,
                hist_title,
            ))
    samples = load_samples(era, False, base='hh_{}'.format(analysis_type))
    denom_file = os.path.join(
        cmssw_base, 'src/hhAnalysis/bbww/data/denom_{}.root'.format(era))
    fileHH = ROOT.TFile(denom_file, 'read')

    weight_sums = {}
    for dbs_name, sample_info in samples.items():
        if dbs_name == 'sum_events':
            continue
        category = sample_info['sample_category']
        if not is_nonresonant(category):
            continue
        if category not in weight_sums:
            weight_sums[category] = {
                scan_idx: []
                for scan_idx in range(nof_weights)
    'dR03mva': 'VLoose',
    'deepVSj': 'VLoose',
}
hadTau_WP = tau_id + hadTau_WP_map[tau_id]
if tau_id_wp:
    hadTau_WP = tau_id_wp

hadTau_WP_ak8_map = {
    'dR03mva': 'Medium',
    'deepVSj': 'Medium',
}
hadTau_WP_ak8 = tau_id + hadTau_WP_ak8_map[tau_id]
if tau_wp_ak8:
    hadTau_WP_ak8 = tau_wp_ak8

samples = load_samples(era, suffix="sync" if use_nonnominal else "sync_nom")

if __name__ == '__main__':
    if sample_filter:
        samples = filter_samples(samples, sample_filter)

    logging.info(
      "Running the jobs with the following systematic uncertainties enabled: %s" % \
      ', '.join(central_or_shifts)
    )

    configDir = os.path.join("/home", getpass.getuser(), "ttHAnalysis", era,
                             version)
    outputDir = os.path.join("/hdfs/local", getpass.getuser(), "ttHAnalysis",
                             era, version)