def test_archive(): # try some of the different __init__ archive = dir_archive(cached=False) check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False, fast=True) check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False, compression=3) check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False, memmode='r+') check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False, serialized=False) check_basic(archive) #check_numpy(archive) #FIXME: see issue #53 rmtree('memo')
def draw_skeleton(path_GT,path_pred,path_visual,name,MPII): # ============================================================================= # Draw skeleton based on model predictions # ============================================================================= prediction=klepto.dir_archive(path_pred,cached=False) prediction.load() archive=klepto.dir_archive(path_GT,cached=False) archive.load() img=archive[name]['img'].astype('uint8') heatmap=prediction[name] # define connections between joints for each dataset if MPII: lines = [(0,1),(1,2),(2,6),(6,3),(3,4),(4,5),(6,7),(7,8),(8,9),(10,11),(11,12),(12,7),(7,13),(13,14),(14,15)] else: lines = [(0,1),(1,2),(3,4),(4,5),(6,7),(7,8),(8,9),(9,10),(10,11),(2,8),(3,9),(12,13)] coords = dict(enumerate(list(rescale_joint_coords(heatmap)))) for points in lines: if coords[points[0]]==(0,0) or coords[points[1]]==(0,0): continue else: cv2.line(img, coords[points[0]], coords[points[1]], (rand(0,255),rand(0,255),rand(0,255)), thickness=2, lineType=8) plt.imshow(img) plt.imsave(path_visual+'Skeleton.png',img)
def test_archive(): # try some of the different __init__ archive = dir_archive(cached=False) check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False,fast=True) check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False,compression=3) check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False,memmode='r+') check_basic(archive) check_numpy(archive) #rmtree('memo') archive = dir_archive(cached=False,serialized=False) check_basic(archive) check_numpy(archive) rmtree('memo')
def prediction(path_GT,path_pred,mymodel): # ============================================================================= # Compute Prediction of image # ============================================================================= prediction=klepto.dir_archive(path_pred,{},cached=False) archive= klepto.dir_archive(path_GT,cached=False) archive.load() for name in archive.keys(): img=archive[name]['img'].reshape(1,w_pic,h_pic,3) predict_heat=mymodel.predict(img/255) prediction[name]=predict_heat
def PCK(path_GT,path_pred,njoints): # ============================================================================= # Compute the PCK metric # ============================================================================= prediction_set = klepto.dir_archive(path_pred,cached=False) prediction_set.load() gt_maps = klepto.dir_archive(path_GT,cached=False) gt_maps.load() accuracy=[0]*njoints for name in prediction_set.keys(): accuracy=accuracy_pred(prediction_set[name], gt_maps[name]['joints'],accuracy) return np.array(accuracy)/len(prediction_set)
def test_foo(): # start fresh rmtree('foo', ignore_errors=True) d = dir_archive('foo', cached=False) key = '1234TESTMETESTMETESTME1234' d._mkdir(key) #XXX: repeat mkdir does nothing, should it clear? I think not. _dir = d._mkdir(key) assert d._getdir(key) == _dir d._rmdir(key) # with _pickle x = [1,2,3,4,5] d._fast = True d[key] = x assert d[key] == x d._rmdir(key) # with dill d._fast = False d[key] = x assert d[key] == x d._rmdir(key) # with import d._serialized = False d[key] = x assert d[key] == x d._rmdir(key) d._serialized = True try: import numpy as np y = np.array(x) # with _pickle d._fast = True d[key] = y assert all(d[key] == y) d._rmdir(key) # with dill d._fast = False d[key] = y assert all(d[key] == y) d._rmdir(key) # with import d._serialized = False d[key] = y assert all(d[key] == y) d._rmdir(key) d._serialized = True except ImportError: pass # clean up rmtree('foo')
def train_data_generator(path, batch_size, inres=(h_pic, w_pic), outres=(h_heat, w_heat)): # ============================================================================= # Create data generator # ============================================================================= archive_train = klepto.dir_archive(path, cached=False) archive_train.load() all_images = np.array(list(archive_train.keys())) size = len(all_images) while True: # take random images names = np.random.permutation(list(archive_train.keys())) num_of_batches = size // batch_size for im in range(num_of_batches): gt_stack = np.zeros(shape=(batch_size, outres[0], outres[1], nOutput)) img_stack = np.zeros(shape=(batch_size, inres[0], inres[1], 3)) selected_photo_names = names[im * batch_size:(im + 1) * batch_size] for j in range(len(selected_photo_names)): gt_stack[j, :, :, :] = np.transpose( np.array(archive_train[selected_photo_names[j]]['joints']), (1, 2, 0)) img_stack[j, :, :, :] = archive_train[ selected_photo_names[j]]['img'] / 255. yield (img_stack, gt_stack)
def main(): args = parse_commandline() klepto = True if klepto: acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3) acc.load('recoil') acc.load('mjj') acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') else: acc = acc_from_dir(args.inpath) outdir = pjoin('./output/', os.path.basename(args.inpath)) if args.channel == 'monojet': from legacy_monojet import legacy_limit_input_monojet legacy_limit_input_monojet(acc, outdir=outdir) elif args.channel == 'monov': from legacy_monov import legacy_limit_input_monov legacy_limit_input_monov(acc, outdir=outdir) elif args.channel == 'vbfhinv': from legacy_vbf import legacy_limit_input_vbf legacy_limit_input_vbf(acc, outdir=outdir)
def main(): args = parse_commandline() klepto = True if klepto: acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3) acc.load('recoil') acc.load('mjj') acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') else: acc = acc_from_dir(args.inpath) args.outdir = pjoin('./output/',list(filter(lambda x:x,args.inpath.split('/')))[-1]) for channel in args.channel.split(','): print(channel) if channel == 'monojet': from legacy_monojet import legacy_limit_input_monojet legacy_limit_input_monojet(acc, args) elif channel == 'monov': from legacy_monov import legacy_limit_input_monov legacy_limit_input_monov(acc, args) elif channel == 'vbfhinv': from legacy_vbf import legacy_limit_input_vbf legacy_limit_input_vbf(acc, outdir=args.outdir, unblind=args.unblind)
def main(): inpath = sys.argv[1] #acc = acc_from_dir("./input/2019-10-07_das_lhevpt_dressed_v1") acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3 ) acc.load('sumw') acc.load('sumw2') outputrootfile = uproot.recreate(f'2017_gen_v_pt_qcd_sf.root') sf_1d(acc, tag='wjet', regex='W.*',outputrootfile=outputrootfile) sf_1d(acc, tag='dy', regex='.*DY.*',outputrootfile=outputrootfile) # # outputrootfile = uproot.recreate(f'test.root') sf_2d(acc, tag='wjet', regex='W.*',pt_type='dress',outputrootfile=outputrootfile) sf_2d(acc, tag='dy', regex='.*DY.*',pt_type='dress',outputrootfile=outputrootfile) sf_1d(acc, tag='gjets', regex='G\d?Jet.*',outputrootfile=outputrootfile) # outputrootfile = uproot.recreate('test.root') sf_2d(acc, tag='gjets',regex='G\d?Jet.*',pt_type='stat1',outputrootfile=outputrootfile)
def test_foo(): # start fresh rmtree('foo', ignore_errors=True) d = dir_archive('foo', cached=False) key = '1234TESTMETESTMETESTME1234' d._mkdir(key) #XXX: repeat mkdir does nothing, should it clear? I think not. _dir = d._mkdir(key) assert d._getdir(key) == _dir d._rmdir(key) # with _pickle x = [1, 2, 3, 4, 5] d._fast = True d[key] = x assert d[key] == x d._rmdir(key) # with dill d._fast = False d[key] = x assert d[key] == x d._rmdir(key) # with import d._serialized = False d[key] = x assert d[key] == x d._rmdir(key) d._serialized = True try: import numpy as np y = np.array(x) # with _pickle d._fast = True d[key] = y assert all(d[key] == y) d._rmdir(key) # with dill d._fast = False d[key] = y assert all(d[key] == y) d._rmdir(key) # with import d._serialized = False d[key] = y assert all(d[key] == y) d._rmdir(key) d._serialized = True except ImportError: pass # clean up rmtree('foo')
def load_big_data(fpath, fname): """ https://stackoverflow.com/questions/17513036/pickle-dump-huge-file-without-memory-error """ arch = dir_archive(fpath + fname, cached=False, serialized=True) arch.load(fname) return arch[fname]
def save_big_data(fpath, fname, data): """ https://stackoverflow.com/questions/17513036/pickle-dump-huge-file-without-memory-error """ arch = dir_archive(fpath + fname, cached=False, serialized=True) arch[fname] = data # # dump from memory cache to the on-disk archive arch.dump()
def __init__(self, path): # store information self.path = path self.arch = archives.dir_archive(self.path) # load new data in archive file self.arch.load()
def klepto_load(inpath): acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3, ) return acc
def main(): inpath_vbf = rebsmear_path( 'submission/vbfhinv/merged_2021-06-11_vbfhinv_ULv8_05Feb21_rebsmear_CR' ) inpath_rs = rebsmear_path( 'submission/merged_2021-06-11_rebsmear_privatePS') acc_vbf = dir_archive(inpath_vbf) acc_vbf.load('sumw') acc_vbf.load('sumw_pileup') acc_vbf.load('nevents') h_qcd = extract_yields_in_cr(acc_vbf, distribution='mjj') # Rebalance and smear output acc_rs = dir_archive(inpath_rs) plot_rebsmear_prediction(acc_rs, h_qcd)
def klepto_load(loc): ''' for loading the dumped dictionarys :return: loaded dictionary ''' dic = dir_archive(loc, {}, serialized=True) dic.load() print('dictionary loaded') return dic
def read_archive(filename, axis=None): #NOTE: could return iterators """read 'parameters' and 'cost' from klepto.dir_archive Inputs: filename: str path to location of klepto.archives.dir_archive axis: int, the desired index the tuple-valued dataset [0,N] """ from klepto.archives import dir_archive arch = dir_archive(filename, cached=True) return for_monitor(arch, axis=axis)
def klepto_dump(merged_dict, loc): ''' to dump the merged dictionary file :param merged_dict: the final merged dictionary obtained :return: None ''' demo = dir_archive(loc, merged_dict, serialized =True) demo.dump() del demo
def met_trigger_eff(distribution): if distribution == 'mjj': tag = '120pfht_mu_mjj' elif distribution == 'recoil': tag = '120pfht_mu_recoil' indir = '/afs/cern.ch/user/a/aakpinar/bucoffea/bucoffea/submission/2019-11-13_vbf_trigger_recoil' acc = dir_archive(indir, serialized=True, compression=0, memsize=1e3) # Pre-load neccessary information acc.load('recoil') acc.load('sumw') acc.load('sumw2') for year in [2017, 2018]: for jeteta_config in [ 'two_central_jets', 'two_forward_jets', 'one_jet_forward_one_jet_central' ]: # Single muon CR region_tag = '1m' for dataset in ['WJetsToLNu_HT_MLM', 'SingleMuon']: plot_recoil(acc, region_tag=region_tag, distribution=distribution, axis_name=distribution, dataset=dataset, year=year, tag=tag, jeteta_config=jeteta_config, output_format='pdf') # Double muon CR region_tag = '2m' for dataset in ['VDYJetsToLL_M-50_HT_MLM', 'SingleMuon']: plot_recoil(acc, region_tag=region_tag, distribution=distribution, axis_name=distribution, dataset=dataset, year=year, tag=tag, jeteta_config=jeteta_config, output_format='pdf') for jeteta_config in [ 'two_central_jets', 'two_forward_jets', 'one_jet_forward_one_jet_central' ]: data_mc_comparison_plot(tag, distribution=distribution, jeteta_config=jeteta_config, output_format='pdf') plot_scalefactors(tag, distribution=distribution)
def main(): inpath = sys.argv[1] acc = dir_archive(inpath, serialized=True, memsize=1e3, compression=0) acc.load('sumw') acc.load('sumw2') plot_ht_dist(acc, regex='WJetsToLNu.*(2017|2018)', tag='wjets') plot_ht_dist(acc, regex='DYJets.*(2017|2018)', tag='dy') plot_ht_dist(acc, regex='GJets_HT.*(2017)', tag='gjets_17') plot_ht_dist(acc, regex='GJets_DR-0p4.*(2017)', tag='gjets_dr_17')
def main(): inpath = sys.argv[1] acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3) acc.load('sumw') acc.load('sumw2') # Create the output ROOT file to save the # PDF uncertainties as a function of v-pt outputrootpath = './output/theory_variations/rootfiles' if not os.path.exists(outputrootpath): os.makedirs(outputrootpath) outputrootfile_z_over_w = uproot.recreate( pjoin(outputrootpath, 'zoverw_pdf_unc.root')) outputrootfile_g_over_z = uproot.recreate( pjoin(outputrootpath, 'goverz_pdf_unc.root')) w_nom, w_unc, vpt_edges, vpt_centers = get_pdf_uncertainty( acc, regex='WNJetsToLNu.*', tag='wjet') dy_nom, dy_unc, vpt_edges, vpt_centers = get_pdf_uncertainty( acc, regex='DYNJetsToLL.*', tag='dy') gjets_nom, gjets_unc, vpt_edges, vpt_centers = get_pdf_uncertainty( acc, regex='G1Jet.*', tag='gjets') data_for_ratio = { 'z_over_w': { 'noms': (dy_nom, w_nom), 'uncs': (dy_unc, w_unc), 'rootfile': outputrootfile_z_over_w }, 'g_over_z': { 'noms': (gjets_nom, dy_nom), 'uncs': (gjets_unc, dy_unc), 'rootfile': outputrootfile_g_over_z }, } for tag, entry in data_for_ratio.items(): noms = entry['noms'] uncs = entry['uncs'] plot_ratio(noms=noms, uncs=uncs, tag=tag, vpt_edges=vpt_edges, vpt_centers=vpt_centers, outputrootfile=entry['rootfile'])
def _load_and_sum(args): """ merge item from list of coffea files and dump it to file For each file, the saved item corresponding to the same key is read out. The sum of the individual items for the individual files is dumped. :param args: Tuple (key to use, file list, output name) :type args: tuple :return: 0 :rtype: int """ # Args is a tuple for easy multiprocessing key, files, outname = args # Load the individual items items = [] for fn in files: try: items.append(load(fn)[key]) except KeyError: continue # Recursive merging while len(items) > 1: x = items.pop(0) y = items.pop(0) s = x + y items.append(s) assert(len(items)==1) # dump the content using klepto arc = dir_archive( outname, serialized=True, compression=0, memsize=1e3, ) arc[key] = items[0] arc.dump(key) arc.clear() return 0
def main(): inpath = "../../input/merged" year = 2017 mc = re.compile( f'(VDY.*HT.*|QCD.*|W.*HT.*|ST_|TTJets-FXFX_|Diboson_|GJets.*HT.*|ZJetsToNuNu.*){year}' ) signal = re.compile(f'WH.*{year}') distribution = "recoil" acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3, ) acc.load(distribution) acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') try: acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution)) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) S_over_B(acc, distribution, 'sr_tight_v', mc=mc, signal=signal, unc=0.05, outname="SB_unc005.png", cutlim=(250, 750)) S_over_B(acc, distribution, 'sr_tight_v', mc=mc, signal=signal, unc=0.10, outname="SB_unc010.png", cutlim=(250, 750)) except KeyError: print("key error ") return -2
def main(): inpath = sys.argv[1] acc = dir_archive(inpath) cfname = 'cutflow_sr_vbf' acc.load(cfname) cf = acc[cfname] outtag = re.findall('merged_.*', inpath)[0].replace('/', '') datasets = list(cf.keys()) cuts = cf[datasets[0]].keys() combined_cf = Counter({cut: 0 for cut in cuts}) for d in datasets: cutflow = Counter(cf[d]) combined_cf += cutflow pcutflow = [] for idx, (c, v) in enumerate(combined_cf.items()): if idx == 0: acc = 100 else: acc = v / list(combined_cf.values())[idx - 1] * 100 pcutflow.append([c, v, acc]) outdir = f'./output/{outtag}' if not os.path.exists(outdir): os.makedirs(outdir) outpath = pjoin(outdir, 'cutflow.txt') with open(outpath, 'w+') as f: f.write(outtag) f.write('\n') f.write( tabulate(pcutflow, headers=['Cut', 'Number of Events', 'Acceptance (%)'], floatfmt=[".0f", ".0f", ".3f"])) print(f'File saved: {outpath}')
def main(): args = parse_cli() # Path to the directory containing list of ROOT input files (R&S trees) inpath = args.inpath acc = dir_archive(inpath) acc.load('sumw') acc.load('sumw2') try: outtag = re.findall('merged_.*', inpath)[0] except KeyError: raise RuntimeError(f'Check the naming of input: {os.path.basename(inpath)}') outdir = f'./output/{outtag}' if not os.path.exists(outdir): os.makedirs(outdir) distributions = BINNINGS.keys() regions = [ 'inclusive', 'sr_vbf', 'cr_vbf_qcd' ] for region in regions: if not re.match(args.region, region): continue for distribution in distributions: if not re.match(args.distribution, distribution): continue make_plot(acc, outdir=outdir, distribution=distribution, region=region, dataset='JetHT', years=args.years )
def main(): overwrite = True # load the config and the cache cfg = loadConfig() # Inputs are defined in a dictionary # dataset : list of files fileset = { 'tW_scattering': glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/tW_scattering__nanoAOD/merged/*.root"), "TTW": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToLNu_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20_ext1-v1/merged/*.root") \ + glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p2/TTWJetsToQQ_TuneCP5_13TeV-amcatnloFXFX-madspin-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root"), # "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromT_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/*.root") # adding this is still surprisingly fast (20GB file!) "ttbar": glob.glob("/hadoop/cms/store/user/dspitzba/nanoAOD/ttw_samples/0p1p3/TTJets_SingleLeptFromTbar_TuneCP5_13TeV-madgraphMLM-pythia8__RunIIAutumn18NanoAODv6-Nano25Oct2019_102X_upgrade2018_realistic_v20-v1/merged/*.root") } # histograms histograms = [ "MET_pt", "Jet_pt", "Jet_eta", "Jet_pt_fwd", "W_pt_notFromTop", "GenJet_pt_fwd", "Spectator_pt", "Spectator_eta" ] histograms += [ "Top_pt", "Top_eta", "Antitop_pt", "Antitop_eta", "W_pt", "W_eta", "N_b", "N_jet", "dijet_mass", "dijet_mass_bestW", "dijet_mass_secondW", "digenjet_mass", "dijet_deltaR" ] # initialize cache cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cfg['caches']['simpleProcessor']), serialized=True) if not overwrite: cache.load() if cfg == cache.get('cfg') and histograms == cache.get( 'histograms') and fileset == cache.get('fileset') and cache.get( 'simple_output'): output = cache.get('simple_output') else: # Run the processor output = processor.run_uproot_job( fileset, treename='Events', processor_instance=exampleProcessor(), executor=processor.futures_executor, executor_args={ 'workers': 1, 'function_args': { 'flatten': False } }, chunksize=500000, ) cache['fileset'] = fileset cache['cfg'] = cfg cache['histograms'] = histograms cache['simple_output'] = output cache.dump() # Make a few plots outdir = "./tmp_plots" if not os.path.exists(outdir): os.makedirs(outdir) for name in histograms: print(name) histogram = output[name] if name == 'MET_pt': # rebin new_met_bins = hist.Bin('pt', r'$E_T^{miss} \ (GeV)$', 20, 0, 200) histogram = histogram.rebin('pt', new_met_bins) if name == 'W_pt_notFromTop': # rebin new_pt_bins = hist.Bin('pt', r'$p_{T}(W) \ (GeV)$', 25, 0, 500) histogram = histogram.rebin('pt', new_pt_bins) ax = hist.plot1d( histogram, overlay="dataset", density=False, stack=True ) # make density plots because we don't care about x-sec differences ax.set_yscale('linear') # can be log #ax.set_ylim(0,0.1) ax.figure.savefig(os.path.join(outdir, "{}.pdf".format(name))) ax.clear() ax = hist.plot1d( histogram, overlay="dataset", density=True, stack=False ) # make density plots because we don't care about x-sec differences ax.set_yscale('linear') # can be log #ax.set_ylim(0,0.1) ax.figure.savefig(os.path.join(outdir, "{}_shape.pdf".format(name))) ax.clear() return output
def plot(inpath): indir = os.path.abspath(inpath) # The processor output is stored in an # 'accumulator', which in our case is # just a dictionary holding all the histograms # Put all your *coffea files into 'indir' and # pass the directory as an argument here. # All input files in the directory will # automatically be found, merged and read. # The merging only happens the first time # you run over a specific set of inputs. acc = dir_archive(inpath, serialized=True, compression=0, memsize=1e3) # Get a settings dictionary that details # which plots to make for each region, # what the axis limits are, etc # Can add plots by extending the dictionary # Or modify axes ranges, etc settings = plot_settings() merged = set() # Separate plots per year for year in [2017, 2018]: # The data to be used for each region # Muon regions use MET, # electron+photon regions use EGamma # ( EGamma = SingleElectron+SinglePhoton for 2017) data = { 'sr_vbf': None, 'cr_1m_vbf': f'MET_{year}', 'cr_2m_vbf': f'MET_{year}', 'cr_1e_vbf': f'EGamma_{year}', 'cr_2e_vbf': f'EGamma_{year}', 'cr_g_vbf': f'EGamma_{year}', } # Same for MC selection # Match datasets by regular expressions # Here for LO V samples (HT binned) mc_lo = { 'sr_vbf': re.compile( f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_2m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_2e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_g_vbf': re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|WJetsToLNu.*HT.*).*{year}'), } # Want to compare LO and NLO, # so do same thing for NLO V samples # All non-V samples remain the same mc_nlo = { 'sr_vbf': re.compile( f'(ZJetsToNuNu.*|EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*FXFX.*).*{year}' ), 'cr_1m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}' ), 'cr_1e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*|.*WJetsToLNu.*FXFX.*).*{year}' ), 'cr_2m_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}' ), 'cr_2e_vbf': re.compile( f'(EW.*|TTJets.*FXFX.*|Diboson.*|ST.*|QCD_HT.*|.*DY.*FXFX.*).*{year}' ), 'cr_g_vbf': re.compile(f'(GJets_(HT|SM).*|QCD_HT.*|W.*FXFX.*).*{year}'), } regions = list(mc_lo.keys()) # Remove signal region, no need in ratio plots regions.remove('sr_vbf') # Make control region ratio plots for both # LO and NLO. Can be skipped if you only # want data / MC agreement plots. outdir = f'./output/{os.path.basename(indir)}/ratios' # Load ingredients from cache acc.load('mjj') acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') cr_ratio_plot(acc, year=year, tag='losf', outdir=outdir, mc=mc_lo, regions=regions, distribution='mjj') cr_ratio_plot(acc, year=year, tag='nlo', outdir=outdir, mc=mc_nlo, regions=regions, distribution='mjj') # Data / MC plots are made here # Loop over all regions for region in mc_lo.keys(): ratio = True if region != 'sr_vbf' else False # Make separate output direcotry for each region outdir = f'./output/{os.path.basename(indir)}/{region}' # Settings for this region plotset = settings[region] # Loop over the distributions for distribution in plotset.keys(): # Load from cache if not distribution in merged: acc.load(distribution) if not distribution in acc.keys(): print( f"WARNING: Distribution {distribution} not found in input files." ) continue acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution)) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) acc[distribution].axis('dataset').sorting = 'integral' merged.add(distribution) try: # The heavy lifting of making a plot is hidden # in make_plot. We call it once using the LO MC make_plot( acc, region=region, distribution=distribution, year=year, data=data[region], mc=mc_lo[region], ylim=plotset[distribution].get('ylim', None), xlim=plotset[distribution].get('xlim', None), tag='losf', outdir=f'./output/{os.path.basename(indir)}/{region}', output_format='pdf', ratio=ratio) # And then we also call it for the NLO MC # The output files will be named according to the 'tag' # argument, so we will be able to tell them apart. make_plot( acc, region=region, distribution=distribution, year=year, data=data[region], mc=mc_nlo[region], ylim=plotset[distribution].get('ylim', None), xlim=plotset[distribution].get('xlim', None), tag='nlo', outdir=f'./output/{os.path.basename(indir)}/{region}', output_format='pdf', ratio=ratio) except KeyError: continue
def plot(args): indir = os.path.abspath(args.inpath) # The processor output is stored in an # 'accumulator', which in our case is # just a dictionary holding all the histograms # Put all your *coffea files into 'indir' and # pass the directory as an argument here. # All input files in the directory will # automatically be found, merged and read. # The merging only happens the first time # you run over a specific set of inputs. acc = dir_archive(args.inpath, serialized=True, compression=0, memsize=1e3) # Get a settings dictionary that details # which plots to make for each region, # what the axis limits are, etc # Can add plots by extending the dictionary # Or modify axes ranges, etc settings = plot_settings() merged = set() # Separate plots per year for year in [2017, 2018]: # The data to be used for each region # Muon regions use MET, # electron+photon regions use EGamma # ( EGamma = SingleElectron+SinglePhoton for 2017) data = { 'sr_vbf': f'MET_{year}', 'cr_1m_vbf': f'MET_{year}', 'cr_2m_vbf': f'MET_{year}', 'cr_1e_vbf': f'EGamma_{year}', 'cr_2e_vbf': f'EGamma_{year}', 'cr_g_vbf': f'EGamma_{year}', } # Same for MC selection # Match datasets by regular expressions # Here for LO V samples (HT binned) mc_lo = { 'sr_vbf': re.compile( f'(ZJetsToNuNu.*|EW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1m_vbf': re.compile( f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_1e_vbf': re.compile( f'(EWKW.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*|.*WJetsToLNu.*HT.*).*{year}' ), 'cr_2m_vbf': re.compile( f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_2e_vbf': re.compile( f'(EWKZ.*ZToLL.*|Top_FXFX.*|Diboson.*|.*DYJetsToLL_M-50_HT_MLM.*).*{year}' ), 'cr_g_vbf': re.compile( f'(GJets_(DR-0p4|SM).*|QCD_data.*|WJetsToLNu.*HT.*).*{year}'), } # Load ingredients from cache acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') # Data / MC plots are made here # Loop over all regions for region in mc_lo.keys(): if not re.match(args.region, region): continue # Plot ratio pads for all regions (now that we're unblinded) ratio = True # Make separate output direcotry for each region outdir = f'./output/{os.path.basename(indir)}/{region}' # Settings for this region plotset = settings[region] # Loop over the distributions for distribution in plotset.keys(): if not re.match(args.distribution, distribution): continue # Load from cache if not distribution in merged: acc.load(distribution) if not distribution in acc.keys(): print( f"WARNING: Distribution {distribution} not found in input files." ) continue acc[distribution] = merge_extensions( acc[distribution], acc, reweight_pu=not ('nopu' in distribution)) scale_xs_lumi(acc[distribution]) acc[distribution] = merge_datasets(acc[distribution]) acc[distribution].axis('dataset').sorting = 'integral' merged.add(distribution) try: # The heavy lifting of making a plot is hidden # in make_plot. We call it once using the LO MC imc = mc_lo[region] if "cr_g" in region and distribution != "recoil": imc = re.compile( imc.pattern.replace('QCD_data', 'QCD.*HT')) make_plot( acc, region=region, distribution=distribution, year=year, data=data[region], mc=imc, ylim=plotset[distribution].get('ylim', None), xlim=plotset[distribution].get('xlim', None), tag='losf', outdir=f'./output/{os.path.basename(indir)}/{region}', output_format='pdf', ratio=ratio) except KeyError: continue
if __name__ == '__main__': from klepto.archives import dir_archive from Tools.samples import * # fileset_2018 #, fileset_2018_small from processor.default_accumulators import * overwrite = True small = False save = True # load the config and the cache cfg = loadConfig() cacheName = 'SS_analysis' if small: cacheName += '_small' cache = dir_archive(os.path.join(os.path.expandvars(cfg['caches']['base']), cacheName), serialized=True) year = 2018 fileset = { 'topW_v3': fileset_2018['topW_v3'], 'topW_EFT_cp8': fileset_2018['topW_EFT_cp8'], 'topW_EFT_mix': fileset_2018['topW_EFT_mix'], 'TTW': fileset_2018['TTW'], 'TTZ': fileset_2018['TTZ'], 'TTH': fileset_2018['TTH'], 'diboson': fileset_2018['diboson'], 'triboson': fileset_2018['triboson'], #'wpwp': fileset_2018['wpwp'], 'TTTT': fileset_2018['TTTT'], 'ttbar': fileset_2018['ttbar'],
sprayer = BuckshotSolver seeker = PowellDirectionalSolver npts = 25 # number of solvers retry = 1 # max consectutive iteration retries without a cache 'miss' tol = 8 # rounding precision mem = 1 # cache rounding precision #CUTE: 'configure' monitor and archive if they are desired if stepmon: stepmon = LoggingMonitor(1) # montor for all runs itermon = LoggingMonitor(1, filename='inv.txt') #XXX: log.txt? else: stepmon = itermon = None if archive: #python2.5 ar_name = '__%s_%sD_cache__' % (model.__self__.__class__.__name__,ndim) archive = dir_archive(ar_name, serialized=True, cached=False) ar_name = '__%s_%sD_invcache__' % (model.__self__.__class__.__name__,ndim) ivcache = dir_archive(ar_name, serialized=True, cached=False) else: archive = ivcache = None from mystic.search import Searcher #XXX: init w/ archive, then UseArchive? sampler = Searcher(npts, retry, tol, mem, _map, archive, sprayer, seeker) sampler.Verbose(disp) sampler.UseTrajectories(traj) ### doit ### maxpts = 1000. #10000. surface = Surface(model, sampler, maxpts=maxpts, dim=ndim) surface.UseMonitor(stepmon, itermon) surface.UseArchive(archive, ivcache)
def main(): # set to True if want to update the mistag root file # otherwise just make the plots if True: outfile = ROOT.TFile.Open(outfilename,'recreate') else: outfile = None # Prepare the acc acc = dir_archive( inpath, serialized=True, compression=0, memsize=1e3, ) acc.load('sumw') acc.load('sumw_pileup') acc.load('nevents') distribution = 'ak8_pt0' distribution_Vmatched = 'ak8_Vmatched_pt0' distribution_mass = 'ak8_mass0' acc.load(distribution) acc.load(distribution_Vmatched) acc.load(distribution_mass) # merge datasets and scale with lumi xs htmp = acc[distribution] htmp_Vmatched = acc[distribution_Vmatched] htmp_mass = acc[distribution_mass] htmp = merge_extensions(htmp, acc, reweight_pu=True) scale_xs_lumi(htmp) htmp = merge_datasets(htmp) acc[distribution]=htmp htmp_Vmatched = merge_extensions(htmp_Vmatched, acc, reweight_pu=True) scale_xs_lumi(htmp_Vmatched) htmp_Vmatched = merge_datasets(htmp_Vmatched) acc[distribution_Vmatched]=htmp_Vmatched htmp_mass = merge_extensions(htmp_mass, acc, reweight_pu=True) scale_xs_lumi(htmp_mass) htmp_mass = merge_datasets(htmp_mass) acc[distribution_mass]=htmp_mass acc[distribution].axis('dataset').sorting = 'integral' acc[distribution_Vmatched].axis('dataset').sorting = 'integral' acc[distribution_mass].axis('dataset').sorting = 'integral' #binning stuff if newbin: htmp = htmp.rebin(htmp.axis('jetpt'),newbin) htmp_Vmatched = htmp_Vmatched.rebin(htmp_Vmatched.axis('jetpt'),newbin) edges = htmp.axis('jetpt').edges() centers = htmp.axis('jetpt').centers() halfwidth = [centers[i]-edges[i] for i in range(len(centers))] for lepton_flag in ['1m','2m','1e','2e','g']: #for lepton_flag in ['g']: for year in [2017,2018]: mc_map = { 'cr_1m_v' : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT).*{year}'), 'cr_1e_v' : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT).*{year}'), 'cr_2m_v' : re.compile(f'(Top.*FXFX|Diboson|DYJetsToLL_M-50_HT_MLM).*{year}'), 'cr_2e_v' : re.compile(f'(Top.*FXFX|Diboson|DYJetsToLL_M-50_HT_MLM).*{year}'), 'cr_g_v' : re.compile(f'(Diboson|QCD_HT|GJets_DR.*HT|VQQGamma_FXFX|WJetsToLNu.*HT).*{year}'), 'cr_nobveto_v' : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'), 'sr_v' : re.compile(f'(Top.*FXFX|Diboson|QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'), } mc_map_noV = { 'cr_1m_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT).*{year}'), 'cr_1e_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT).*{year}'), 'cr_2m_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM).*{year}'), 'cr_2e_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM).*{year}'), 'cr_g_v' : re.compile(f'(QCD_HT|GJets_DR.*HT|WJetsToLNu.*HT).*{year}'), 'cr_nobveto_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'), 'sr_v' : re.compile(f'(QCD_HT|DYJetsToLL_M-50_HT_MLM|WJetsToLNu.*HT|GJets_DR.*HT|ZJetsToNuNu).*{year}'), } mc_map_realV = { 'cr_1m_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'), 'cr_1e_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'), 'cr_2m_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'), 'cr_2e_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'), 'cr_g_v' : re.compile(f'(Diboson|VQQGamma_FXFX).*{year}'), 'cr_nobveto_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'), 'sr_v' : re.compile(f'(Top.*FXFX|Diboson).*{year}'), } # use NLO GJets for the measurement if need to if nlogjet: mc_map['cr_g_v'] = re.compile(f'(Diboson|QCD_HT|GJets_1j|VQQGamma_FXFX|WJetsToLNu.*HT).*{year}') mc_map_noV['cr_g_v'] = re.compile(f'(QCD_HT|GJets_1j|WJetsToLNu.*HT).*{year}') for wp in ['loose','tight','medium']: region_all = f'cr_{lepton_flag}_hasmass_inclusive_v' region_all_nomass = f'cr_{lepton_flag}_inclusive_v' region_pass= f'cr_{lepton_flag}_nomistag_{wp}_v' region_pass_nomass= f'cr_{lepton_flag}_nomistag_nomass_{wp}_v' mc_All = mc_map[f'cr_{lepton_flag}_v'] mc_False = mc_map_noV[f'cr_{lepton_flag}_v'] mc_Real = mc_map_realV[f'cr_{lepton_flag}_v'] if lepton_flag in ['1e','2e','g']: data = re.compile(f'EGamma_{year}') else: data = re.compile(f'MET_{year}') ### DEBUG ### # print(acc[distribution][mc_All].integrate("region",region_all).values()) # print(acc[distribution][mc_All].integrate("region",region_pass).values()) # print(acc[distribution_Vmatched][mc_All].integrate("region",region_pass).values()) ############# #make stack_plot for all and pass try: acc["alskjxkjo"] make_plot(acc, region=region_all, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_all_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_all_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_all, distribution=distribution, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV") make_plot(acc, region=region_all, distribution=distribution_Vmatched, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV") make_plot(acc, region=region_all, distribution=distribution, year=year, data=None, mc=mc_False, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCNoV") make_plot(acc, region=region_all, distribution=distribution, year=year, data=data, mc=None, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="data") make_plot(acc, region=region_pass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_pass_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_pass_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_pass, distribution=distribution, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV") make_plot(acc, region=region_pass, distribution=distribution_Vmatched, year=year, data=None, mc=mc_Real, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCHasV") make_plot(acc, region=region_pass, distribution=distribution, year=year, data=None, mc=mc_False, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="MCNoV") make_plot(acc, region=region_pass, distribution=distribution, year=year, data=data, mc=None, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3), ratio=False, tag="data") except ValueError: print(f"Warning(ValueError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins") except AssertionError: print(f"Warning(AssertionError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins") except KeyError: print(f"Warning(KeyError): skipping plots for lepton_flag={lepton_flag} year={year} wp={wp} due to negative or zero bins") try: make_plot(acc, region=region_pass_nomass, distribution=distribution, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) make_plot(acc, region=region_pass_nomass, distribution=distribution_mass, year=year, data=data, mc=mc_All, outdir=f'{outdir}/stack_plots', output_format='png', ylim=(10e-4,5e3)) except: pass # extract mistag rate for data and mc selector_region_all,selector_region_pass = region_all, region_pass if not massden: selector_region_all = region_all_nomass if not massnum: selector_region_pass = region_pass_nomass for sysvar in all_sysvar: if sysvar=="nominal": sysvar_tag = "" else: sysvar_tag = "_"+sysvar # background substraction from data: remove real Vs h_data = htmp[data].integrate('dataset') #h_mc_Real = htmp[mc_Real].integrate('dataset') h_mc_False = htmp[mc_False].integrate('dataset') h_mc_Real = htmp_Vmatched[mc_Real] # vary within systematics # norm for both 10%, bveto unc for top 6% for diboson 2%, vtag unc for both 10% (approx) if sysvar=="sysUp": h_mc_Real.scale(1.15) if sysvar=="sysDn": h_mc_Real.scale(0.85) if sysvar=="topNormUp": h_mc_Real.scale ( { "Top_FXFX_2017" : 1.10, "Top_FXFX_2018" : 1.10} , axis="dataset" ) if sysvar=="topNormDn": h_mc_Real.scale ( { "Top_FXFX_2017" : 0.90, "Top_FXFX_2018" : 0.90} , axis="dataset" ) if sysvar=="vvNormUp": h_mc_Real.scale ( { "Diboson_2017" : 1.10, "Diboson_2018" : 1.10} , axis="dataset" ) if sysvar=="vvNormDn": h_mc_Real.scale ( { "Diboson_2017" : 0.90, "Diboson_2018" : 0.90} , axis="dataset" ) if sysvar=="vgNormUp": h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 1.10, "VQQGamma_FXFX_2018" : 1.10} , axis="dataset" ) if sysvar=="vgNormDn": h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 0.90, "VQQGamma_FXFX_2018" : 0.90} , axis="dataset" ) if sysvar=="topVTagUp": h_mc_Real.scale ( { "Top_FXFX_2017" : 1.10, "Top_FXFX_2018" : 1.10} , axis="dataset" ) if sysvar=="topVTagDn": h_mc_Real.scale ( { "Top_FXFX_2017" : 0.90, "Top_FXFX_2018" : 0.90} , axis="dataset" ) if sysvar=="vvVTagUp": h_mc_Real.scale ( { "Diboson_2017" : 1.10, "Diboson_2018" : 1.10} , axis="dataset" ) if sysvar=="vvVTagDn": h_mc_Real.scale ( { "Diboson_2017" : 0.90, "Diboson_2018" : 0.90} , axis="dataset" ) if sysvar=="vgVTagUp": h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 1.10, "VQQGamma_FXFX_2018" : 1.10} , axis="dataset" ) if sysvar=="vgVTagDn": h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 0.90, "VQQGamma_FXFX_2018" : 0.90} , axis="dataset" ) if sysvar=="topBVetoUp": h_mc_Real.scale ( { "Top_FXFX_2017" : 1.06, "Top_FXFX_2018" : 1.06} , axis="dataset" ) if sysvar=="topBVetoDn": h_mc_Real.scale ( { "Top_FXFX_2017" : 0.94, "Top_FXFX_2018" : 0.94} , axis="dataset" ) if sysvar=="vvBVetoUp": h_mc_Real.scale ( { "Diboson_2017" : 1.02, "Diboson_2018" : 1.02} , axis="dataset" ) if sysvar=="vvBVetoDn": h_mc_Real.scale ( { "Diboson_2017" : 0.98, "Diboson_2018" : 0.98} , axis="dataset" ) if sysvar=="vgBVetoUp": h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 1.02, "VQQGamma_FXFX_2018" : 1.02} , axis="dataset" ) if sysvar=="vgBVetoDn": h_mc_Real.scale ( { "VQQGamma_FXFX_2017" : 0.98, "VQQGamma_FXFX_2018" : 0.98} , axis="dataset" ) h_mc_Real = h_mc_Real.integrate('dataset') h_mc_Real.scale(-1*realVSF) # just for background substraction h_data.add(h_mc_Real) teff_mistag_rate_data = get_mistag_rate(h_data, selector_region_all, selector_region_pass, flag=f'data_{lepton_flag}_{wp}_{year}{sysvar_tag}', isData=True) teff_mistag_rate_data.SetNameTitle(f'mistag_rate_data_{lepton_flag}_{wp}_{year}{sysvar_tag}','mistagging rate') teff_mistag_rate_mc = get_mistag_rate(h_mc_False, selector_region_all, selector_region_pass, flag=f'mc_{lepton_flag}_{wp}_{year}{sysvar_tag}', isData=False) teff_mistag_rate_mc.SetNameTitle(f'mistag_rate_mc_{lepton_flag}_{wp}_{year}{sysvar_tag}','mistagging rate') # get the scale factors # note that it's impossible to divide two TEfficiency in ROOT, have to do that manually th1_mistag_SF = ratio_of_efficiencies(f'mistag_SF_{lepton_flag}_{wp}_{year}{sysvar_tag}', 'mistag scale factor', teff_mistag_rate_data, teff_mistag_rate_mc) # save the mistag rate and SF histograms into root file if outfile: teff_mistag_rate_data.Write() teff_mistag_rate_mc.Write() th1_mistag_SF.Write() # soup togather all CR using a weighted_average between the regions: for year in [2017,2018]: for wp in ['loose','tight','medium']: for sysvar in all_sysvar: if sysvar=="nominal": sysvar_tag = "" else: sysvar_tag = "_"+sysvar teff_mistag_rate_data_1e = outfile.Get(f'mistag_rate_data_1e_{wp}_{year}{sysvar_tag}') teff_mistag_rate_data_2e = outfile.Get(f'mistag_rate_data_2e_{wp}_{year}{sysvar_tag}') teff_mistag_rate_data_1m = outfile.Get(f'mistag_rate_data_1m_{wp}_{year}{sysvar_tag}') teff_mistag_rate_data_2m = outfile.Get(f'mistag_rate_data_2m_{wp}_{year}{sysvar_tag}') teff_mistag_rate_data_g = outfile.Get(f'mistag_rate_data_g_{wp}_{year}{sysvar_tag}') teff_mistag_rate_mc_1e = outfile.Get(f'mistag_rate_mc_1e_{wp}_{year}{sysvar_tag}') teff_mistag_rate_mc_2e = outfile.Get(f'mistag_rate_mc_2e_{wp}_{year}{sysvar_tag}') teff_mistag_rate_mc_1m = outfile.Get(f'mistag_rate_mc_1m_{wp}_{year}{sysvar_tag}') teff_mistag_rate_mc_2m = outfile.Get(f'mistag_rate_mc_2m_{wp}_{year}{sysvar_tag}') teff_mistag_rate_mc_g = outfile.Get(f'mistag_rate_mc_g_{wp}_{year}{sysvar_tag}') # souped SF for all W/Z regions teff_mistag_rate_data_wz = teff_mistag_rate_data_1e + teff_mistag_rate_data_2e\ + teff_mistag_rate_data_1m + teff_mistag_rate_data_2m teff_mistag_rate_data_wz.SetNameTitle(f'mistag_rate_data_wz_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W and Z') teff_mistag_rate_mc_wz = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_2e\ + teff_mistag_rate_mc_1m + teff_mistag_rate_mc_2m teff_mistag_rate_mc_wz.SetNameTitle(f'mistag_rate_mc_wz_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W and Z') th1_mistag_SF_wz = ratio_of_efficiencies(f'mistag_SF_wz_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for W and Z', teff_mistag_rate_data_wz, teff_mistag_rate_mc_wz) # souped SF for all W regions teff_mistag_rate_data_w = teff_mistag_rate_data_1e + teff_mistag_rate_data_1m teff_mistag_rate_data_w.SetNameTitle(f'mistag_rate_data_w_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W') teff_mistag_rate_mc_w = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_1m teff_mistag_rate_mc_w.SetNameTitle(f'mistag_rate_mc_w_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for W') th1_mistag_SF_w = ratio_of_efficiencies(f'mistag_SF_w_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for W', teff_mistag_rate_data_w, teff_mistag_rate_mc_w) # souped SF for all Z regions teff_mistag_rate_data_z = teff_mistag_rate_data_2e + teff_mistag_rate_data_2m teff_mistag_rate_data_z.SetNameTitle(f'mistag_rate_data_z_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for Z') teff_mistag_rate_mc_z = teff_mistag_rate_mc_2e + teff_mistag_rate_mc_2m teff_mistag_rate_mc_z.SetNameTitle(f'mistag_rate_mc_z_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for Z') th1_mistag_SF_z = ratio_of_efficiencies(f'mistag_SF_z_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for Z', teff_mistag_rate_data_z, teff_mistag_rate_mc_z) # souped SF for all regions including photon teff_mistag_rate_data_all = teff_mistag_rate_data_1e + teff_mistag_rate_data_2e\ + teff_mistag_rate_data_1m + teff_mistag_rate_data_2m + teff_mistag_rate_data_g teff_mistag_rate_data_all.SetNameTitle(f'mistag_rate_data_all_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for all') teff_mistag_rate_mc_all = teff_mistag_rate_mc_1e + teff_mistag_rate_mc_2e\ + teff_mistag_rate_mc_1m + teff_mistag_rate_mc_2m + teff_mistag_rate_mc_g teff_mistag_rate_mc_all.SetNameTitle(f'mistag_rate_mc_all_{wp}_{year}{sysvar_tag}', 'souped mistagging rate for all') th1_mistag_SF_all = ratio_of_efficiencies(f'mistag_SF_all_{wp}_{year}{sysvar_tag}', 'souped mistag scale factor for all', teff_mistag_rate_data_all, teff_mistag_rate_mc_all) if outfile: teff_mistag_rate_data_wz.Write() teff_mistag_rate_mc_wz.Write() th1_mistag_SF_wz.Write() teff_mistag_rate_data_w.Write() teff_mistag_rate_mc_w.Write() th1_mistag_SF_w.Write() teff_mistag_rate_data_z.Write() teff_mistag_rate_mc_z.Write() th1_mistag_SF_z.Write() teff_mistag_rate_data_all.Write() teff_mistag_rate_mc_all.Write() th1_mistag_SF_all.Write() if outfile: outfile.Close()
#!/usr/bin/env python # # Author: Mike McKerns (mmckerns @caltech and @uqfoundation) # Copyright (c) 2013-2015 California Institute of Technology. # License: 3-clause BSD. The full license text is available at: # - http://trac.mystic.cacr.caltech.edu/project/pathos/browser/klepto/LICENSE from klepto.archives import dir_archive from pox import rmtree # start fresh rmtree('foo', ignore_errors=True) d = dir_archive('foo', cached=False) key = '1234TESTMETESTMETESTME1234' d._mkdir(key) #XXX: repeat mkdir does nothing, should it clear? I think not. _dir = d._mkdir(key) assert d._getdir(key) == _dir d._rmdir(key) # with _pickle x = [1,2,3,4,5] d._fast = True d[key] = x assert d[key] == x d._rmdir(key) # with dill d._fast = False