Exemplo n.º 1
0
    return r


#A list of the fit result files, separated by cut and channel
fit_files = [
    ('final_2j1t_mva', 'ele', os.environ["STPOL_DIR"] +
        '/final_fit/results/ele__mva_BDT_with_top_mass_C_eta_lj_el_pt_mt_el_pt_bj_mass_bj_met_mass_lj.txt'
    ),
    ('final_2j1t_mva', 'mu', os.environ["STPOL_DIR"] +
        '/final_fit/results/mu__mva_BDT_with_top_mass_eta_lj_C_mu_pt_mt_mu_met_mass_bj_pt_bj_mass_lj.txt'
    ),
]


fitpars = NestedDict()
fitpars_process = NestedDict()

#Load all the files in two formats
for (cut, chan, inf) in fit_files:
    fitpars[cut][chan] = from_file(inf, match_fmt='sample')
    fitpars_process[cut][chan] = from_file(inf, match_fmt='process')

#Separate SF-s for MET-variated templates, probably no longer needed.
names = [
    "t-channel", "top+QCD", "W, diboson"
]
for met in [30, 50, 70]:
    met = str(met)
    fitpars['final_2j1t_mva_met'+met]['mu'] = from_file(os.environ["STPOL_DIR"]+"/final_fit/results/histos/met%s/mu__mva_BDT_with_top_mass_eta_lj_C_mu_pt_mt_mu_met_mass_bj_pt_bj_mass_lj.txt" % met)
    # FIXME -> Currently no MET cut dependent results for electron channel
Exemplo n.º 2
0
def combine_templates(templates, patterns, conf):
    """
    Args:
    """

    hists = {}
    hsources = []
    for k in ["data", "mc_nom", "mc_varsamp", "mc_varproc"]:
        items = templates[patterns[k]]
        if len(items)==0:
            raise MatchingException("Nothing matched to %s:%s" % (k, patterns[k]))
        hsources += items

    if len(hsources)==0:
        raise ValueError("No histograms matched")

    hqcd = NestedDict()
    hqcd["nominal"][None] = []
    for syst in ["yield", "iso"]:
        for sdir in ["up", "down"]:
            hqcd[syst][sdir] = []
    templates_qcd = templates[patterns["data_antiiso"]]

    if len(templates_qcd)==0:
        raise MatchingException("Nothing matched to %s:%s" % ("data_antiiso", patterns["data_antiiso"]))

    for keys, hist in templates[patterns["data_antiiso"]]:
        if keys[1].startswith("antiiso"):
            #We have isolation variations
            isodir = keys[1].split("_")[1]
            if isodir=="nominal":
                hqcd["nominal"][None].append(hist)
                hup = hist.Clone()
                hdown = hist.Clone()
                hup.Scale(qcd_yield_variations[0])
                hdown.Scale(qcd_yield_variations[1])
                hqcd["yield"]["up"].append(hup)
                hqcd["yield"]["down"].append(hdown)

            elif isodir in ["up", "down"]:
                hqcd["iso"][isodir].append(hist)
            else:
                raise ValueError("Undefined isolation variation direction: %s" % isodir)

        #We only have the nominal QCD shape
        elif keys[1]=="weight__unweighted":
            hqcd["nominal"][None].append(hist)
            hup = hist.Clone()
            hdown = hist.Clone()
            hup.Scale(qcd_yield_variations[0])
            hdown.Scale(qcd_yield_variations[1])
            hqcd["yield"]["up"].append(hup)
            hqcd["yield"]["down"].append(hdown)

            #Placeholders for the isolation variation
            for isodir in ["up", "down"]:
                h = hist.Clone()
                hqcd["iso"][isodir].append(h)
        else:
            raise Exception("Couldn't parse the QCD pattern: %s" % str(keys))

    def map_leaves(di, f, equate=True):
        for k, v in di.items():
            if isinstance(v, dict):
                map_leaves(v, f)
            else:
                if equate:
                    di[k] = f(v)
                else:
                    f(v)
        return di

    #Sum the anti-iso data subsamples
    map_leaves(hqcd, lambda li: reduce(lambda x,y: x+y, li))

    #Normalize the isolation variations to the nominal
    map_leaves(hqcd["iso"],
        lambda hi:
            hi.Scale(hqcd["nominal"][None].Integral() / hi.Integral()) if hi.Integral()>0 else 0,
        equate=False
    )

    #Add the variated data-driven QCD templates
    hsources += [
        (("data", "qcd", "weight__unweighted"), hqcd["nominal"][None]),
        (("data", "qcd", "weight__qcd_yield_up"), hqcd["yield"]["up"]),
        (("data", "qcd", "weight__qcd_yield_down"), hqcd["yield"]["down"]),
        (("data", "qcd", "weight__qcd_iso_up"), hqcd["iso"]["up"]),
        (("data", "qcd", "weight__qcd_iso_down"), hqcd["iso"]["down"]),
    ]

        #f = open('temp.pickle','wb')
        #pickle.dump(hsources, f)
        #f.close()

    #load the histos from the temporary pickle
    #f = open('temp.pickle','rb')
    #hsources = pickle.load(f)

    syst_scenarios = NestedDict()
    for (sample_var, sample, weight_var), hist in hsources:
        make_hist(hist)
        # if "__ele" in weight_var:
        #     continue

        if ".root" in sample:
            sample = sample[:sample.index(".root")]

        if "__" in weight_var:
            spl = weight_var.split("__")
            wn = spl[1]
        else:
            wn = weight_var
        sample_var = sample_var.lower()
        wtype = None
        wdir = None
        stype = None
        sdir = None

        syst = None

        #Nominal weight, look for variated samples
        if wn=="nominal":
            syst = sample_var
        elif wn=="unweighted":
            syst="unweighted"
        else:
            #Variated weight, use only nominal sample or data in case of data-driven shapes
            if not (sample_var=="nominal" or sample_var=="data"):
                continue
            syst = wn

        if wn==conf.get_nominal_weight() and sample_var=="nominal":
            logger.info("Using %s:%s as nominal sample for %s" % (wn, sample_var, sample))
            syst_scenarios[sample]["nominal"][None] = hist
        #A systematic scenario which has a separate systematic sample
        elif sample_var == "syst":
            try:
                r = get_syst_from_sample_name(sample)
            except Exception as e:
                logger.warning("Unhandled systematic: %s" % str(e))
                r = None
            if not r:
                continue
            sample, systname, d = r
            #sample = map_syst_sample_to_nominal(sample)
            syst_scenarios[sample][systname][d] = hist
        else:
            logger.debug("Systematically variated weight: %s:%s %s" % (wn, sample_var, sample))
            systname, d = get_updown(syst)
            syst_scenarios[sample][systname][d] = hist
    logger.info("histogram W3Jets_exclusive nominal: " + "%f %d" % (
        syst_scenarios["W3Jets_exclusive"]["nominal"][None].Integral(),
        syst_scenarios["W3Jets_exclusive"]["nominal"][None].GetEntries())
    )
    ######################################
    ### Save systematics, fill missing ###
    ######################################

    #########
    # tchan #
    #########
    #T_t_ToLeptons mass_up is missing, take the mass down and flip the difference with the nominal
    mnomt = syst_scenarios["T_t_ToLeptons"]["nominal"][None].Clone()
    mdownt = syst_scenarios["T_t_ToLeptons"]["mass"]["down"].Clone()
    mupt = (mnomt+mnomt-mdownt)
    syst_scenarios["T_t_ToLeptons"]["mass"]["up"] = mupt

    #########
    # TTBar #
    #########
    #TTbar variations are provided for the inclusive only, fill them for the exclusive
    nom_ttbar = syst_scenarios["TTJets_FullLept"]["nominal"][None] + syst_scenarios["TTJets_SemiLept"]["nominal"][None]
    for syst in ["mass", "ttbar_scale", "ttbar_matching"]:
        for sample in ["TTJets_FullLept", "TTJets_SemiLept"]:
            for sd in ["up", "down"]:
                syst_scenarios[sample][syst][sd] = syst_scenarios[sample]["nominal"][None] * syst_scenarios["TTJets"][syst][sd] / nom_ttbar

    syst_scenarios.pop("TTJets")

    syst_scenarios = syst_scenarios.as_dict()

    #Create the output file
    p = os.path.dirname(conf.get_outfile_unmerged())
    if not os.path.exists(p):
        os.makedirs(p)
    of = ROOT.TFile(conf.get_outfile_unmerged() , "RECREATE")
    of.cd()

    #Get the list of all possible systematic scenarios that we have available
    allsysts = get_all_systs(syst_scenarios)
    for sampn, h1 in syst_scenarios.items():

        #Consider all the possible systematic scenarios
        for systname in allsysts:

            #If we have it available, fine, use it
            if systname in h1.keys():
                h2 = h1[systname]

            #If not, in case of MC and a non-trivial variation
            elif not sampn.startswith("Single") and systname not in ["unweighted", "nominal"]:

                #Try to get the unvariated template as a placeholder
                h = h1.get("nominal", None)
                if not h:
                    h = h1.get("unweighted", None)
                if not h:
                    raise Exception("Could not get the nominal template for %s:%s" % (sampn, systname))

                #Our convention is that even the unvariated template is a dict with a single
                #key for the direction of variation, which is 'None'
                h = h[None]

                #Add placeholder templates
                for systdir in ["up", "down"]:
                    h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir))
                    set_missing_hist(h)

                    #Save to file
                    h.SetDirectory(of)
                    h.Write()
                continue
            else:
                continue
            for systdir, h in h2.items():
                if systdir==None and systname=="nominal" or not sample_types.is_mc(sampn):
                    h = h.Clone(hname_encode(conf.varname, sampn))
                elif systdir==None and systname=="unweighted":
                    h = h.Clone(hname_encode(conf.varname, sampn, "unweighted"))
                else:
                    h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir))
                h.SetDirectory(of)
                h.Write()
    nkeys = len(of.GetListOfKeys())
    logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath()))
#    of.Close()

    ########################
    ### Load systematics ###
    ########################
#    of_unmerged = File(conf.get_outfile_unmerged())
    hists = dict()
#    ROOT.gROOT.cd()
    for k in of.GetListOfKeys():
        hists[k.GetName()] = of.Get(k.GetName())
        h = hists[k.GetName()]
        #hists[k.GetName()].Rebin(2)
    logger.info("Loaded %d histograms from file %s" % (len(hists), of.GetPath()))
    #of_unmerged.Close()

    ########################
    ###      Merge       ###
    ########################
    from plots.common.utils import merge_hists, PhysicsProcess
    merge_cmds = PhysicsProcess.get_merge_dict(
        PhysicsProcess.get_proc_dict(conf.channel)
    )
    hsysts = NestedDict()
    for k, v in hists.items():
        spl = split_name(k)
        hsysts[spl["type"]][spl["dir"]][spl["sample"]] = v
    hsysts = hsysts.as_dict()

    p = os.path.dirname(conf.get_outfile_merged())
    if not os.path.exists(p):
        os.makedirs(p)
    of = ROOT.TFile(conf.get_outfile_merged(), "RECREATE")
    of.cd()

    for syst, h1 in hsysts.items():
        if syst in skipped_systs:
            continue
        for sdir, h2 in h1.items():
            hmc = merge_hists(h2, merge_cmds)
            for hn, h in hmc.items():
                if syst=="nominal" or syst=="unweighted":
                    h.SetName("__".join([spl["var"], hn]))
                else:
                    h.SetName("__".join([spl["var"], hn, syst, sdir]))
                h.SetDirectory(of)
                h.Write()
    nkeys = len(of.GetListOfKeys())
    logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath()))
    of.Close()

    hists = load_theta_format(conf.get_outfile_merged())
    processes = []
    systs = []
    for (variable, sample, syst, systdir), v in hists.items_flat():
        processes.append(sample)
        systs.append(syst)
    processes = set(processes)
    systs = set(systs)
    logger.info("Processes: %s" % processes)
    if not processes == set(['diboson', 'schan', 'tWchan', 'TTJets', 'tchan', 'WJets', 'qcd', 'DYJets', 'data']):
        raise Exception("Combined file did not contain the necessary processes: %s" % str(processes))
    logger.info("Systematic scenarios: %s" % systs)
Exemplo n.º 3
0
	cut = Cuts.final(2,1)*Cuts.mu
	var = "cos_theta"
	plot_range=[20, -1, 1]
	lumi = lumi_iso["mu"]
	plot_args = {"x_label": "cos #theta"}


	weights = [
	    ("unw", "1.0"),
	    ("nom", "b_weight_nominal"),
	    ("bc_up", "b_weight_nominal_BCup"),
	    ("bc_down", "b_weight_nominal_BCdown"),
	    ("l_up", "b_weight_nominal_Lup"),
	    ("l_down", "b_weight_nominal_Ldown"),
	]
	hists = NestedDict()

	merge_cmds = {
		"ttbar": ["TTJets_.*"],
		"tchan": [".*_ToLeptons"],
		"wjets": ["W[1-4]Jets.*"]
	}

	pretty_names = {
		"ttbar": "t#bar{t}",
		"tchan": "signal (t-channel)",
		"wjets": "W",
	}

	pretty_names_weights = {
		"unw": "unweighted",
Exemplo n.º 4
0
    base = os.path.join(os.environ['STPOL_DIR'], 'qcd_estimation', 'fitted', channel)
    fn = base + '/%s_no_MC_subtraction_mt_%s_plus.txt' % (cut, met)
    fi = open(fn)
    li = fi.readline().strip().split()
    sf = float(li[0])

    #The yield and its uncertainty
    y = float(li[1])
    uncy = float(li[2])

    #Calculate the uncertainty of the scale factor
    unc_sf = uncy/y

    if not do_uncertainty:
        return sf
    else:
        return sf, unc_sf * sf

qcdScale = dict()
qcdScale['mu'] = dict()
qcdScale['ele'] = dict()

#Determined as the ratio of the integral of anti-iso data after full selection / full selection minus MVA
#See qcd_scale_factors.ipynb for determination
qcd_cut_SF = NestedDict()
qcd_cut_SF['mva']['loose']['mu'] = 0.114754098361
qcd_cut_SF['mva']['loose']['ele'] = 0.0734908136483
qcd_cut_SF['cutbased']['final']['mu'] = 0.0983606557377
qcd_cut_SF['cutbased']['final']['ele'] = 0.0629921259843
qcd_cut_SF = qcd_cut_SF.as_dict()