示例#1
0
def combine_templates(templates, patterns, conf):
    """
    Args:
    """

    hists = {}
    hsources = []
    for k in ["data", "mc_nom", "mc_varsamp", "mc_varproc"]:
        items = templates[patterns[k]]
        if len(items)==0:
            raise MatchingException("Nothing matched to %s:%s" % (k, patterns[k]))
        hsources += items

    if len(hsources)==0:
        raise ValueError("No histograms matched")

    hqcd = NestedDict()
    hqcd["nominal"][None] = []
    for syst in ["yield", "iso"]:
        for sdir in ["up", "down"]:
            hqcd[syst][sdir] = []
    templates_qcd = templates[patterns["data_antiiso"]]

    if len(templates_qcd)==0:
        raise MatchingException("Nothing matched to %s:%s" % ("data_antiiso", patterns["data_antiiso"]))

    for keys, hist in templates[patterns["data_antiiso"]]:
        if keys[1].startswith("antiiso"):
            #We have isolation variations
            isodir = keys[1].split("_")[1]
            if isodir=="nominal":
                hqcd["nominal"][None].append(hist)
                hup = hist.Clone()
                hdown = hist.Clone()
                hup.Scale(qcd_yield_variations[0])
                hdown.Scale(qcd_yield_variations[1])
                hqcd["yield"]["up"].append(hup)
                hqcd["yield"]["down"].append(hdown)

            elif isodir in ["up", "down"]:
                hqcd["iso"][isodir].append(hist)
            else:
                raise ValueError("Undefined isolation variation direction: %s" % isodir)

        #We only have the nominal QCD shape
        elif keys[1]=="weight__unweighted":
            hqcd["nominal"][None].append(hist)
            hup = hist.Clone()
            hdown = hist.Clone()
            hup.Scale(qcd_yield_variations[0])
            hdown.Scale(qcd_yield_variations[1])
            hqcd["yield"]["up"].append(hup)
            hqcd["yield"]["down"].append(hdown)

            #Placeholders for the isolation variation
            for isodir in ["up", "down"]:
                h = hist.Clone()
                hqcd["iso"][isodir].append(h)
        else:
            raise Exception("Couldn't parse the QCD pattern: %s" % str(keys))

    def map_leaves(di, f, equate=True):
        for k, v in di.items():
            if isinstance(v, dict):
                map_leaves(v, f)
            else:
                if equate:
                    di[k] = f(v)
                else:
                    f(v)
        return di

    #Sum the anti-iso data subsamples
    map_leaves(hqcd, lambda li: reduce(lambda x,y: x+y, li))

    #Normalize the isolation variations to the nominal
    map_leaves(hqcd["iso"],
        lambda hi:
            hi.Scale(hqcd["nominal"][None].Integral() / hi.Integral()) if hi.Integral()>0 else 0,
        equate=False
    )

    #Add the variated data-driven QCD templates
    hsources += [
        (("data", "qcd", "weight__unweighted"), hqcd["nominal"][None]),
        (("data", "qcd", "weight__qcd_yield_up"), hqcd["yield"]["up"]),
        (("data", "qcd", "weight__qcd_yield_down"), hqcd["yield"]["down"]),
        (("data", "qcd", "weight__qcd_iso_up"), hqcd["iso"]["up"]),
        (("data", "qcd", "weight__qcd_iso_down"), hqcd["iso"]["down"]),
    ]

        #f = open('temp.pickle','wb')
        #pickle.dump(hsources, f)
        #f.close()

    #load the histos from the temporary pickle
    #f = open('temp.pickle','rb')
    #hsources = pickle.load(f)

    syst_scenarios = NestedDict()
    for (sample_var, sample, weight_var), hist in hsources:
        make_hist(hist)
        # if "__ele" in weight_var:
        #     continue

        if ".root" in sample:
            sample = sample[:sample.index(".root")]

        if "__" in weight_var:
            spl = weight_var.split("__")
            wn = spl[1]
        else:
            wn = weight_var
        sample_var = sample_var.lower()
        wtype = None
        wdir = None
        stype = None
        sdir = None

        syst = None

        #Nominal weight, look for variated samples
        if wn=="nominal":
            syst = sample_var
        elif wn=="unweighted":
            syst="unweighted"
        else:
            #Variated weight, use only nominal sample or data in case of data-driven shapes
            if not (sample_var=="nominal" or sample_var=="data"):
                continue
            syst = wn

        if wn==conf.get_nominal_weight() and sample_var=="nominal":
            logger.info("Using %s:%s as nominal sample for %s" % (wn, sample_var, sample))
            syst_scenarios[sample]["nominal"][None] = hist
        #A systematic scenario which has a separate systematic sample
        elif sample_var == "syst":
            try:
                r = get_syst_from_sample_name(sample)
            except Exception as e:
                logger.warning("Unhandled systematic: %s" % str(e))
                r = None
            if not r:
                continue
            sample, systname, d = r
            #sample = map_syst_sample_to_nominal(sample)
            syst_scenarios[sample][systname][d] = hist
        else:
            logger.debug("Systematically variated weight: %s:%s %s" % (wn, sample_var, sample))
            systname, d = get_updown(syst)
            syst_scenarios[sample][systname][d] = hist
    logger.info("histogram W3Jets_exclusive nominal: " + "%f %d" % (
        syst_scenarios["W3Jets_exclusive"]["nominal"][None].Integral(),
        syst_scenarios["W3Jets_exclusive"]["nominal"][None].GetEntries())
    )
    ######################################
    ### Save systematics, fill missing ###
    ######################################

    #########
    # tchan #
    #########
    #T_t_ToLeptons mass_up is missing, take the mass down and flip the difference with the nominal
    mnomt = syst_scenarios["T_t_ToLeptons"]["nominal"][None].Clone()
    mdownt = syst_scenarios["T_t_ToLeptons"]["mass"]["down"].Clone()
    mupt = (mnomt+mnomt-mdownt)
    syst_scenarios["T_t_ToLeptons"]["mass"]["up"] = mupt

    #########
    # TTBar #
    #########
    #TTbar variations are provided for the inclusive only, fill them for the exclusive
    nom_ttbar = syst_scenarios["TTJets_FullLept"]["nominal"][None] + syst_scenarios["TTJets_SemiLept"]["nominal"][None]
    for syst in ["mass", "ttbar_scale", "ttbar_matching"]:
        for sample in ["TTJets_FullLept", "TTJets_SemiLept"]:
            for sd in ["up", "down"]:
                syst_scenarios[sample][syst][sd] = syst_scenarios[sample]["nominal"][None] * syst_scenarios["TTJets"][syst][sd] / nom_ttbar

    syst_scenarios.pop("TTJets")

    syst_scenarios = syst_scenarios.as_dict()

    #Create the output file
    p = os.path.dirname(conf.get_outfile_unmerged())
    if not os.path.exists(p):
        os.makedirs(p)
    of = ROOT.TFile(conf.get_outfile_unmerged() , "RECREATE")
    of.cd()

    #Get the list of all possible systematic scenarios that we have available
    allsysts = get_all_systs(syst_scenarios)
    for sampn, h1 in syst_scenarios.items():

        #Consider all the possible systematic scenarios
        for systname in allsysts:

            #If we have it available, fine, use it
            if systname in h1.keys():
                h2 = h1[systname]

            #If not, in case of MC and a non-trivial variation
            elif not sampn.startswith("Single") and systname not in ["unweighted", "nominal"]:

                #Try to get the unvariated template as a placeholder
                h = h1.get("nominal", None)
                if not h:
                    h = h1.get("unweighted", None)
                if not h:
                    raise Exception("Could not get the nominal template for %s:%s" % (sampn, systname))

                #Our convention is that even the unvariated template is a dict with a single
                #key for the direction of variation, which is 'None'
                h = h[None]

                #Add placeholder templates
                for systdir in ["up", "down"]:
                    h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir))
                    set_missing_hist(h)

                    #Save to file
                    h.SetDirectory(of)
                    h.Write()
                continue
            else:
                continue
            for systdir, h in h2.items():
                if systdir==None and systname=="nominal" or not sample_types.is_mc(sampn):
                    h = h.Clone(hname_encode(conf.varname, sampn))
                elif systdir==None and systname=="unweighted":
                    h = h.Clone(hname_encode(conf.varname, sampn, "unweighted"))
                else:
                    h = h.Clone(hname_encode(conf.varname, sampn, systname, systdir))
                h.SetDirectory(of)
                h.Write()
    nkeys = len(of.GetListOfKeys())
    logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath()))
#    of.Close()

    ########################
    ### Load systematics ###
    ########################
#    of_unmerged = File(conf.get_outfile_unmerged())
    hists = dict()
#    ROOT.gROOT.cd()
    for k in of.GetListOfKeys():
        hists[k.GetName()] = of.Get(k.GetName())
        h = hists[k.GetName()]
        #hists[k.GetName()].Rebin(2)
    logger.info("Loaded %d histograms from file %s" % (len(hists), of.GetPath()))
    #of_unmerged.Close()

    ########################
    ###      Merge       ###
    ########################
    from plots.common.utils import merge_hists, PhysicsProcess
    merge_cmds = PhysicsProcess.get_merge_dict(
        PhysicsProcess.get_proc_dict(conf.channel)
    )
    hsysts = NestedDict()
    for k, v in hists.items():
        spl = split_name(k)
        hsysts[spl["type"]][spl["dir"]][spl["sample"]] = v
    hsysts = hsysts.as_dict()

    p = os.path.dirname(conf.get_outfile_merged())
    if not os.path.exists(p):
        os.makedirs(p)
    of = ROOT.TFile(conf.get_outfile_merged(), "RECREATE")
    of.cd()

    for syst, h1 in hsysts.items():
        if syst in skipped_systs:
            continue
        for sdir, h2 in h1.items():
            hmc = merge_hists(h2, merge_cmds)
            for hn, h in hmc.items():
                if syst=="nominal" or syst=="unweighted":
                    h.SetName("__".join([spl["var"], hn]))
                else:
                    h.SetName("__".join([spl["var"], hn, syst, sdir]))
                h.SetDirectory(of)
                h.Write()
    nkeys = len(of.GetListOfKeys())
    logger.info("Saved %d histograms to file %s" % (nkeys, of.GetPath()))
    of.Close()

    hists = load_theta_format(conf.get_outfile_merged())
    processes = []
    systs = []
    for (variable, sample, syst, systdir), v in hists.items_flat():
        processes.append(sample)
        systs.append(syst)
    processes = set(processes)
    systs = set(systs)
    logger.info("Processes: %s" % processes)
    if not processes == set(['diboson', 'schan', 'tWchan', 'TTJets', 'tchan', 'WJets', 'qcd', 'DYJets', 'data']):
        raise Exception("Combined file did not contain the necessary processes: %s" % str(processes))
    logger.info("Systematic scenarios: %s" % systs)
示例#2
0
文件: data_mc.py 项目: HEP-KBFI/stpol
def data_mc_plot(samples, plot_def, name, lepton_channel, lumi, weight, physics_processes, use_antiiso=False):

    logger.info('Plot in progress %s' % name)

    merge_cmds = PhysicsProcess.get_merge_dict(physics_processes) #The actual merge dictionary

    var = plot_def['var']

    #Id var is a list/tuple, assume
    if not isinstance(var, basestring):
        try:
            if lepton_channel == 'ele':
                var = var[0]
            elif lepton_channel == 'mu':
                var = var[1]
        except Exception as e:
            logger.error("Plot variable 'var' specification incorrect for multi-variable plot: %s" % str(var))
            raise e
    cut = None
    if lepton_channel == 'ele':
        cut = plot_def['elecut']
    elif lepton_channel == 'mu':
        cut = plot_def['mucut']

    cut_str = str(cut)

    plot_range = plot_def['range']

    do_norm = False
    if 'normalize' in plot_def.keys() and plot_def['normalize']:
        do_norm = True
    hists_mc = dict()
    hists_data = dict()
    for name, sample in samples.items():
        logger.debug("Starting to plot %s" % name)
        if sample.isMC:
            hist = sample.drawHistogram(var, cut_str, weight=str(weight), plot_range=plot_range)
            hist.Scale(sample.lumiScaleFactor(lumi))
            hists_mc[sample.name] = hist
            if do_norm:
                Styling.mc_style_nostack(hists_mc[sample.name], sample.name)
            else:
                Styling.mc_style(hists_mc[sample.name], sample.name)

            if "fitpars" in plot_def.keys():
                rescale_to_fit(sample.name, hist, plot_def["fitpars"][lepton_channel])
        elif "antiiso" in name and plot_def['estQcd'] and not use_antiiso:

            # Make loose template
            #Y U NO LOOP :) -JP
            region = '2j1t'
            if '2j0t' in plot_def['estQcd']: region='2j0t'
            if '3j0t' in plot_def['estQcd']: region='3j0t'
            if '3j1t' in plot_def['estQcd']: region='3j1t'
            if '3j2t' in plot_def['estQcd']: region='3j2t'

            qcd_extra_cut = Cuts.deltaR(0.3)*Cuts.antiiso(lepton_channel)

            #Take the loose template with a good shape from the N-jet, M-tag, post lepton selection region with high statistics
            qcd_loose_cut = cutlist[region]*cutlist['presel_'+lepton_channel]*qcd_extra_cut

            #Take the template which can be correctly normalized from the actual region with inverted isolation cuts
            qcd_cut = cut*qcd_extra_cut

            hist_qcd_loose = sample.drawHistogram(var, str(qcd_loose_cut), weight="1.0", plot_range=plot_range)
            hist_qcd = sample.drawHistogram(var, str(qcd_cut), weight="1.0", plot_range=plot_range)
            logger.debug("Using the QCD scale factor %s: %.2f" % (plot_def['estQcd'], qcdScale[lepton_channel][plot_def['estQcd']]))
            

            hist_qcd.Scale(qcdScale[lepton_channel][plot_def['estQcd']])
            hist_qcd_loose.Scale(hist_qcd.Integral()/hist_qcd_loose.Integral())
            if var=='cos_theta':
                hist_qcd=hist_qcd_loose
            sampn = "QCD"+sample.name

            #Rescale the QCD histogram to the eta_lj fit
            if "fitpars" in plot_def.keys():
                rescale_to_fit(sampn, hist_qcd, plot_def["fitpars"][lepton_channel])

            hists_mc[sampn] = hist_qcd
            hists_mc[sampn].SetTitle('QCD')
            if do_norm:
                Styling.mc_style_nostack(hists_mc[sampn], 'QCD')
            else:
                Styling.mc_style(hists_mc[sampn], 'QCD')

        #Real ordinary data in the isolated region
        elif not "antiiso" in name or use_antiiso:
            hist_data = sample.drawHistogram(var, cut_str, weight="1.0", plot_range=plot_range)
            hist_data.SetTitle('Data')
            Styling.data_style(hist_data)
            hists_data[name] = hist_data


    if len(hists_data.values())==0:
        raise Exception("Couldn't draw the data histogram")

    #Combine the subsamples to physical processes
    hist_data = sum(hists_data.values())
    merge_cmds['QCD']=["QCD"+merge_cmds['data'][0]]
    order=['QCD']+PhysicsProcess.desired_plot_order
    if plot_def['log']:
        order = PhysicsProcess.desired_plot_order_log+['QCD']
    merged_hists = merge_hists(hists_mc, merge_cmds, order=order)

    if hist_data.Integral()<=0:
        logger.error(hists_data)
        logger.error("hist_data.entries = %d" % hist_data.GetEntries())
        logger.error("hist_data.integral = %d" % hist_data.Integral())
        raise Exception("Histogram for data was empty. Something went wrong, please check.")

    if do_norm:
        for k,v in merged_hists.items():
            v.Scale(1./v.Integral())
        hist_data.Scale(1./hist_data.Integral())

    htot = sum(merged_hists.values())

    chi2 = hist_data.Chi2Test(htot, "UW CHI2/NDF")
    if chi2>20:#FIXME: uglyness
        logger.error("The chi2 between data and MC is large (%s, chi2=%.2f). You may have errors with your samples!" %
            (name, chi2)
        )
        logger.info("MC  : %s" % " ".join(map(lambda x: "%.1f" % x, list(htot.y()))))
        logger.info("DATA: %s" % " ".join(map(lambda x: "%.1f" % x, list(hist_data.y()))))
        logger.info("diff: %s" % str(
            " ".join(map(lambda x: "%.1f" % x, numpy.abs(numpy.array(list(htot.y())) - numpy.array(list(hist_data.y())))))
        ))

    merged_hists_l = merged_hists.values()

    PhysicsProcess.name_histograms(physics_processes, merged_hists)

    leg_style = ['p','f']
    if do_norm:
        leg_style=['p','l']
    leg = legend([hist_data] + list(reversed(merged_hists_l)), legend_pos=plot_def['labloc'], styles=leg_style)

    canv = ROOT.TCanvas()

    #Make the stacks
    stacks_d = OrderedDict()
    stacks_d["mc"] = merged_hists_l
    stacks_d["data"] = [hist_data]

    #label
    xlab = plot_def['xlab']
    if not isinstance(xlab, basestring):
        if lepton_channel == 'ele':
            xlab = xlab[0]
        else:
            xlab = xlab[1]
    ylab = 'N / '+str((1.*(plot_range[2]-plot_range[1])/plot_range[0]))
    if plot_def['gev']:
        ylab+=' GeV'
    fact = 1.5
    if plot_def['log']:
        fact = 10

    plow=0.3
    if do_norm:
        plow=0

    #Make a separate pad for the stack plot
    p1 = ROOT.TPad("p1", "p1", 0, plow, 1, 1)
    p1.Draw()
    p1.SetTicks(1, 1);
    p1.SetGrid();
    p1.SetFillStyle(0);
    p1.cd()

    stacks = plot_hists_stacked(p1, stacks_d, x_label=xlab, y_label=ylab, max_bin_mult = fact, do_log_y = plot_def['log'], stack = (not do_norm))

    #Put the the lumi box where the legend is not
    boxloc = 'top-right'
    if plot_def['labloc'] == 'top-right':
        boxloc = 'top-left'
    chan = 'Electron'
    if lepton_channel == "mu":
        chan = 'Muon'

    additional_comments = ""
    if 'cutname' in plot_def.keys():
        additional_comments += ", " + plot_def['cutname'][lepton_channel]
    lbox = lumi_textbox(lumi,
        boxloc,
        'preliminary',
        chan + ' channel' + additional_comments
    )

    #Draw everything
    lbox.Draw()
    leg.Draw()
    canv.Draw()

    #Keep the handles just in case
    canv.PAD1 = p1
    canv.STACKS = stacks
    canv.LEGEND = legend
    canv.LUMIBOX = lbox

    return canv, merged_hists, htot, hist_data